+++ /dev/null
-BasedOnStyle: Google
-IndentWidth: 4
-UseTab: Never
----
-Language: Cpp
-Standard: Cpp11
-
-AccessModifierOffset: -4
-AllowAllArgumentsOnNextLine: false
-AllowShortFunctionsOnASingleLine: Empty
-AllowShortLambdasOnASingleLine: Empty
-AlwaysBreakBeforeMultilineStrings: false
-ColumnLimit: 120
-DerivePointerAlignment: false
-FixNamespaceComments: true
-IndentCaseLabels: false
-SpaceBeforeCpp11BracedList: true
-SpaceBeforeCtorInitializerColon: false
----
+++ /dev/null
-# .coveragerc to control coverage.py
-[run]
-branch = True
-
-source =
- mo/
- mo.py
-
-omit =
- # omit anything in a .local directory anywhere
- */.local/*
- # omit everything in /usr
- /usr/*
- # omit tests
- */test_*.py
- # init scripts
- */__init__.py
-
-[report]
-# Regexes for lines to exclude from consideration
-exclude_lines =
- # Have to re-enable the standard pragma
- pragma: no cover
-
- # Don't complain about missing debug-only code:
- def __repr__
-
- # Don't complain if tests don't hit defensive assertion code:
- raise AssertionError
- raise NotImplementedError
-
- # Don't complain if non-runnable code isn't run:
- if 0:
- if __name__ == .__main__.:
-
-ignore_errors = True
-
-[html]
-directory = htmlcov
\ No newline at end of file
# but ensure we don't skip __init__.py
!__init__.py
# developer tools
-.idea
+*.idea
.vscode
cmake-build-debug
cmake-build-release
doc/
docs/build_documentation/work_dir/
inference-engine/plugins/
+inference-engine/temp
+inference-engine/report
.repo/
docs/template_plugin/html/
CMakeLists.txt.user
docs/IE_PLUGIN_DG/html/
+
+*.project
+*.cproject
+*.pydevproject
+*.settings
+*/gen/
+__pycache__
+*.swp
+/config.xml
+
+# Python-specific
+*.env3
+*.pyc
+
+# Tests-specific
+*.coverage
+*htmlcov
+*pylint_report.txt
+*pylint_report_comments.txt
+
+# Artifacts
+/model-optimizer/*.bin
+/model-optimizer/*.xml
+/model-optimizer/*.json
+/model-optimizer/*.so
+/model-optimizer/*.txt
+/model-optimizer/*.pb
+/model-optimizer/*.pbtxt
+/model-optimizer/!CMakeLists.txt
+/model-optimizer/*.mapping
+/model-optimizer/*.dat
+/model-optimizer/*.svg
\ No newline at end of file
cmake_minimum_required(VERSION 3.7.2 FATAL_ERROR)
endif()
-
project(OpenVINO)
set(OpenVINO_MAIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
include(features)
# include developer package
-include(developer_package NO_POLICY_SCOPE)
+include(developer_package)
# These options are shared with 3rdparty plugins
# by means of developer package
message (STATUS "PROJECT ............................... " ${PROJECT_NAME})
message (STATUS "CMAKE_BINARY_DIR ...................... " ${CMAKE_BINARY_DIR})
message (STATUS "OpenVINO_MAIN_SOURCE_DIR .............. " ${OpenVINO_MAIN_SOURCE_DIR})
-message (STATUS "IE_MAIN_SOURCE_DIR .............. " ${IE_MAIN_SOURCE_DIR})
+message (STATUS "IE_MAIN_SOURCE_DIR .................... " ${IE_MAIN_SOURCE_DIR})
message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR})
message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID})
message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE})
if (NOT ANDROID)
ngraph_set(NGRAPH_UNIT_TEST_ENABLE TRUE)
- ngraph_set(NGRAPH_UNIT_TEST_OPENVINO_ENABLE TRUE)
+ ngraph_set(NGRAPH_IE_ENABLE TRUE)
ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE TRUE)
else()
ngraph_set(NGRAPH_UNIT_TEST_ENABLE FALSE)
ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE FALSE)
endif()
- if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
ie_add_compiler_flags(-Wno-error=uninitialized -Wno-error=literal-conversion)
elseif(UNIX)
ie_add_compiler_flags(-Wno-error=maybe-uninitialized -Wno-error=return-type -fPIC)
endif()
if(ENABLE_AVX512F)
- if ((CMAKE_CXX_COMPILER_ID MATCHES MSVC) AND (MSVC_VERSION VERSION_LESS 1920))
+ if ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") AND (MSVC_VERSION VERSION_LESS 1920))
# 1920 version of MSVC 2019. In MSVC 2017 AVX512F not work
set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
endif()
- if (CMAKE_CXX_COMPILER_ID MATCHES Clang)
+ if (CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
endif()
- if ((CMAKE_CXX_COMPILER_ID STREQUAL GNU) AND (NOT (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9)))
+ if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (NOT (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9)))
set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
endif()
endif()
set(CMAKE_BUILD_TYPE "Release")
endif()
-set(OUTPUT_ROOT ${OpenVINO_MAIN_SOURCE_DIR})
+# allow to override default OUTPUT_ROOT root
+if(NOT DEFINED OUTPUT_ROOT)
+ set(OUTPUT_ROOT ${OpenVINO_MAIN_SOURCE_DIR})
+endif()
# Enable postfixes for Debug/Release builds
set(IE_DEBUG_POSTFIX_WIN "d")
# Use solution folders
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+set(CMAKE_POLICY_DEFAULT_CMP0054 NEW)
+
include(sdl)
-include(os_flags NO_POLICY_SCOPE)
+include(os_flags)
include(sanitizer)
function(set_ci_build_number)
function(enable_fuzzing)
# Enable (libFuzzer)[https://llvm.org/docs/LibFuzzer.html] if supported.
- if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT WIN32)
+ if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32)
# Communicate libfuzzer is enabled
set(WITH_LIBFUZZER ON PARENT_SCOPE)
add_compile_definitions(WITH_LIBFUZZER)
#
macro(disable_deprecated_warnings)
if(WIN32)
- if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(ie_c_cxx_deprecated "/Qdiag-disable:1478,1786")
- elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(ie_c_cxx_deprecated "/wd4996")
endif()
else()
- if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(ie_c_cxx_deprecated "-diag-disable=1478,1786")
else()
set(ie_c_cxx_deprecated "-Wno-deprecated-declarations")
#
macro(ie_deprecated_no_errors)
if(WIN32)
- if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(ie_c_cxx_deprecated "/Qdiag-warning:1478,1786")
- elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(ie_c_cxx_deprecated "/wd4996")
endif()
else()
- if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(ie_c_cxx_deprecated_no_errors "-diag-warning=1478,1786")
else()
set(ie_c_cxx_deprecated_no_errors "-Wno-error=deprecated-declarations")
#
function(ie_sse42_optimization_flags flags)
if(WIN32)
- if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# No such option for MSVC 2019
- elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(${flags} "/arch:SSE4.2 /QxSSE4.2" PARENT_SCOPE)
else()
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
endif()
else()
- if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(${flags} "-msse4.2 -xSSE4.2" PARENT_SCOPE)
else()
set(${flags} "-msse4.2" PARENT_SCOPE)
#
function(ie_avx2_optimization_flags flags)
if(WIN32)
- if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(${flags} "/QxCORE-AVX2" PARENT_SCOPE)
- elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(${flags} "/arch:AVX2" PARENT_SCOPE)
else()
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
endif()
else()
- if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(${flags} "-march=core-avx2 -xCORE-AVX2 -mtune=core-avx2" PARENT_SCOPE)
else()
set(${flags} "-mavx2 -mfma" PARENT_SCOPE)
#
function(ie_avx512_optimization_flags flags)
if(WIN32)
- if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(${flags} "/QxCOMMON-AVX512" PARENT_SCOPE)
- elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(${flags} "/arch:AVX512" PARENT_SCOPE)
else()
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
endif()
else()
- if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(${flags} "-xCOMMON-AVX512" PARENT_SCOPE)
endif()
- if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(${flags} "-mavx512f -mfma" PARENT_SCOPE)
endif()
endif()
set(CMAKE_RANLIB "gcc-ranlib")
endif()
elseif(WIN32)
- if(CMAKE_BUILD_TYPE STREQUAL Release)
- # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GL")
- # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /GL")
- # set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LTCG:STATUS")
- # set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /LTCG:STATUS")
- # set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /LTCG:STATUS")
+ if(CMAKE_BUILD_TYPE STREQUAL "Release")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GL")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /GL")
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LTCG:STATUS")
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /LTCG:STATUS")
+ set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /LTCG:STATUS")
endif()
endif()
endmacro()
# to allows to override CMAKE_CXX_STANDARD from command line
if(NOT DEFINED CMAKE_CXX_STANDARD)
- if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_STANDARD 14)
else()
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
endif()
-if(COVERAGE)
+if(ENABLE_COVERAGE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --coverage")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
if (TREAT_WARNING_AS_ERROR)
- if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
ie_add_compiler_flags(/WX)
ie_add_compiler_flags(/Qdiag-warning:47,1740,1786)
- elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# ie_add_compiler_flags(/WX) # Too many warnings
endif()
endif()
# Disable noisy warnings
- if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# C4251 needs to have dll-interface to be used by clients of class
ie_add_compiler_flags(/wd4251)
# C4275 non dll-interface class used as base for dll-interface class
ie_add_compiler_flags(/wd4275)
endif()
- if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
# 161 unrecognized pragma
# 177 variable was declared but never referenced
# 556 not matched type of assigned function pointer
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Z7")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7")
-
- if(ENABLE_DEBUG_SYMBOLS)
- ie_add_compiler_flags(/Z7)
-
- set(DEBUG_SYMBOLS_LINKER_FLAGS "/DEBUG")
- if (CMAKE_BUILD_TYPE STREQUAL "Release")
- # Keep default /OPT values. See /DEBUG reference for details.
- set(DEBUG_SYMBOLS_LINKER_FLAGS "${DEBUG_SYMBOLS_LINKER_FLAGS} /OPT:REF /OPT:ICF")
- endif()
-
- set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}")
- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}")
- set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}")
- endif()
else()
# TODO: enable for C sources as well
# ie_add_compiler_flags(-Werror)
set(SANITIZER_LINKER_FLAGS "-fsanitize=address")
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=gold")
- elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT WIN32)
+ elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32)
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
endif()
endif()
if (ENABLE_THREAD_SANITIZER)
- set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=thread")
-
- set(SANITIZER_LINKER_FLAGS "-fsanitize=thread")
+ set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=thread -fno-omit-frame-pointer")
+ set(SANITIZER_LINKER_FLAGS "-fsanitize=thread -static-libsan")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
if (NOT ENABLE_SANITIZER)
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -s")
endif()
- elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if (NOT ENABLE_SANITIZER)
set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
endif()
- elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} /sdl")
endif()
set(MSVC64 ON)
endif()
-if(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine
OUTPUT_VARIABLE OPENVINO_GCC_TARGET_MACHINE
OUTPUT_STRIP_TRAILING_WHITESPACE)
# Coverage
#
-if(COVERAGE)
+if(ENABLE_COVERAGE)
include(coverage_ie)
endif()
# Generate reports
-ie_coverage_extract(INPUT "dldt" OUTPUT "inference_engine_with_builders"
+ie_coverage_extract(INPUT "dldt" OUTPUT "inference_engine"
PATTERNS "${DLDT_COVERAGE_BASE_DIRECTORY}/inference_engine/*"
"${DLDT_COVERAGE_BASE_DIRECTORY}/plugin_api/*")
-ie_coverage_remove(INPUT "inference_engine_with_builders" OUTPUT "inference_engine"
- PATTERNS "${DLDT_COVERAGE_BASE_DIRECTORY}/inference_engine/builders/*")
ie_coverage_genhtml(INFO_FILE "inference_engine"
PREFIX "${DLDT_COVERAGE_BASE_DIRECTORY}")
+ie_coverage_extract(INPUT "dldt" OUTPUT "inference_engine_ir_reader"
+ PATTERNS "${DLDT_COVERAGE_BASE_DIRECTORY}/ir_readers/*")
+ie_coverage_genhtml(INFO_FILE "inference_engine_ir_reader"
+ PREFIX "${DLDT_COVERAGE_BASE_DIRECTORY}")
+
ie_coverage_extract(INPUT "dldt" OUTPUT "inference_engine_legacy"
PATTERNS "${DLDT_COVERAGE_BASE_DIRECTORY}/legacy_api/*")
ie_coverage_genhtml(INFO_FILE "inference_engine_legacy"
#
if(ENABLE_CPPLINT)
- find_host_package(PythonInterp)
+ find_package(Python3 COMPONENTS Interpreter)
- if(NOT PYTHONINTERP_FOUND)
- message(WARNING "Python interpreter was not found (required for cpplint check)")
+ if(NOT Python3_Interpreter_FOUND)
+ message(WARNING "Python3 interpreter was not found (required for cpplint check)")
set(ENABLE_CPPLINT OFF)
endif()
endif()
message("")
set(gflags_DIR "@gflags_BINARY_DIR@")
+# GNA lib dir
+set(GNA "@GNA@")
# Targets
list(APPEND CMAKE_MODULE_PATH "${IE_MAIN_SOURCE_DIR}/cmake")
# generic stuff from developer package
-include(developer_package NO_POLICY_SCOPE)
+include(developer_package)
include(developer_package_ie)
# Don't threat deprecated API warnings as errors in 3rd party apps
endif()
endif()
-ie_option (ENABLE_IR_READER "Compile with IR readers / parsers" ON)
-
ie_option (ENABLE_VPU "vpu targeted plugins for inference engine" ON)
ie_dependent_option (ENABLE_MYRIAD "myriad targeted plugin for inference engine" ON "ENABLE_VPU" OFF)
ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
-ie_dependent_option (ENABLE_GAPI_TESTS "tests for GAPI kernels" OFF "ENABLE_TESTS" OFF)
+ie_dependent_option (ENABLE_GAPI_TESTS "tests for GAPI kernels" ON "ENABLE_TESTS" OFF)
ie_dependent_option (GAPI_TEST_PERF "if GAPI unit tests should examine performance" OFF "ENABLE_GAPI_TESTS" OFF)
ie_dependent_option (ENABLE_BEH_TESTS "tests oriented to check inference engine API corecteness" ON "ENABLE_TESTS" OFF)
-ie_dependent_option (ENABLE_FUNCTIONAL_TESTS "functional tests" ON "ENABLE_TESTS;ENABLE_IR_READER" OFF)
+ie_dependent_option (ENABLE_FUNCTIONAL_TESTS "functional tests" ON "ENABLE_TESTS" OFF)
ie_dependent_option (ENABLE_SAMPLES "console samples are part of inference engine package" ON "NOT MINGW" OFF)
ie_option (ENABLE_OPENCV "enables OpenCV" ON)
-ie_option (ENABLE_DEBUG_SYMBOLS "generates symbols for debugging" OFF)
-
ie_option (ENABLE_PYTHON "enables ie python bridge build" OFF)
-ie_option (ENABLE_CPP_CCT "enables C++ version of Cross Check Tool" OFF)
-
ie_option (ENABLE_C "enables ie c bridge build" ON)
-ie_dependent_option(ENABLE_CPPLINT "Enable cpplint checks during the build" OFF "UNIX;NOT APPLE;NOT ANDROID" OFF)
+ie_dependent_option(ENABLE_CPPLINT "Enable cpplint checks during the build" ON "UNIX;NOT APPLE;NOT ANDROID" OFF)
+
ie_dependent_option(ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF "ENABLE_CPPLINT" OFF)
-ie_option(ENABLE_CLANG_FORMAT "Enable clang-format checks during the build" OFF)
+ie_option(ENABLE_CLANG_FORMAT "Enable clang-format checks during the build" ON)
set(IE_EXTRA_PLUGINS "" CACHE STRING "Extra paths for plugins to include into DLDT build tree")
endfunction()
function(add_lfs_repo name prefix url tag)
+ if(TARGET ${name})
+ return()
+ endif()
+
ExternalProject_Add(${name}
PREFIX ${prefix}
GIT_REPOSITORY ${url}
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
IMPORTED_LOCATION "${IE${ie_library_usuffix}_RELEASE_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
- if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
INTERFACE_COMPILE_OPTIONS "-diag-warning=1786")
else()
const char *fileName = ep->d_name;
if (strcmp(fileName, ".") == 0 || strcmp(fileName, "..") == 0) continue;
char *file_path = (char *)calloc(strlen(arg) + strlen(ep->d_name) + 2, sizeof(char));
- strcpy(file_path, arg);
- strcat(file_path, "/");
- strcat(file_path, ep->d_name);
+ memcpy(file_path, arg, strlen(arg));
+ memcpy(file_path + strlen(arg), "/", strlen("/"));
+ memcpy(file_path + strlen(arg) + strlen("/"), ep->d_name, strlen(ep->d_name) + 1);
if (file_num == 0) {
file_paths = (char **)calloc(1, sizeof(char *));
dp = NULL;
} else {
char *file_path = (char *)calloc(strlen(arg) + 1, sizeof(char));
- strcpy(file_path, arg);
+ memcpy(file_path, arg, strlen(arg) + 1);
if (file_num == 0) {
file_paths = (char **)calloc(1, sizeof(char *));
}
if (fscanf(file, "%s", key)!= EOF && fscanf(file, "%s", value) != EOF) {
char *cfg_name = (char *)calloc(strlen(key) + 1, sizeof(char));
char *cfg_value = (char *)calloc(strlen(value) + 1, sizeof(char));
- strcpy(cfg_name, key);
- strcpy(cfg_value, value);
+ memcpy(cfg_name, key, strlen(key) + 1);
+ memcpy(cfg_value, value, strlen(value) + 1);
ie_config_t *cfg_t = (ie_config_t *)calloc(1, sizeof(ie_config_t));
cfg_t->name = cfg_name;
cfg_t->value = cfg_value;
}
char *cfg_name = (char *)calloc(strlen(key) + 1, sizeof(char));
char *cfg_value = (char *)calloc(strlen(value) + 1, sizeof(char));
- strcpy(cfg_name, key);
- strcpy(cfg_value, value);
+ memcpy(cfg_name, key, strlen(key) + 1);
+ memcpy(cfg_value, value, strlen(value) + 1);
ie_config_t *cfg_t = (ie_config_t *)calloc(1, sizeof(ie_config_t));
cfg_t->name = cfg_name;
cfg_t->value = cfg_value;
// --------------------------- 4. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
input_weight = (char *)calloc(strlen(input_model) + 1, sizeof(char));
- strncpy(input_weight, input_model, strlen(input_model)-4);
- strcat(input_weight, ".bin");
+ memcpy(input_weight, input_model, strlen(input_model) - 4);
+ memcpy(input_weight + strlen(input_model) - 4, ".bin", strlen(".bin") + 1);
printf("%sLoading network files:\n", info);
printf("\t%s\n", input_model);
printf("\t%s\n", input_weight);
char str_num[16] = {0};
int2str(str_num, batch_id);
char *img_path = (char *)calloc(strlen(out) + strlen(str_num) + strlen(".bmp") + 1, sizeof(char));
- strcpy(img_path, out);
- strcat(img_path, str_num);
- strcat(img_path, ".bmp");
+ memcpy(img_path, out, strlen(out));
+ memcpy(img_path + strlen(out), str_num, strlen(str_num));
+ memcpy(img_path + strlen(out) + strlen(str_num), ".bmp", strlen(".bmp") + 1);
image_save(img_path, &originalImages[batch_id]);
printf("%sImage %s created!\n", info, img_path);
free(img_path);
DATA_PATH=\"${DATA_PATH}\"
MODELS_PATH=\"${MODELS_PATH}\" )
+add_dependencies(${TARGET_NAME} MultiDevicePlugin)
+
if(ENABLE_MKL_DNN)
add_dependencies(${TARGET_NAME} MKLDNNPlugin)
endif()
IE_ASSERT_OK(ie_core_create("", &core));
ASSERT_NE(nullptr, core);
- ie_core_versions_t ie_core_versions_multi;
ie_param_t param;
- if (ie_core_get_versions(core, "MULTI", &ie_core_versions_multi) != IEStatusCode::OK ||
- ie_core_get_metric(core, "GPU", "AVAILABLE_DEVICES", ¶m) != IEStatusCode::OK) {
+ if (ie_core_get_metric(core, "GPU", "AVAILABLE_DEVICES", ¶m) != IEStatusCode::OK) {
ie_core_free(&core);
GTEST_SKIP();
}
ie_config_t config_param = {"MULTI_DEVICE_PRIORITIES", "GPU,CPU", nullptr};
IE_EXPECT_OK(ie_exec_network_set_config(exe_network, &config_param));
- ie_core_versions_free(&ie_core_versions_multi);
- ie_param_free(¶m);
ie_exec_network_free(&exe_network);
ie_network_free(&network);
ie_core_free(&core);
+ ie_param_free(¶m);
}
TEST(ie_exec_network_get_metric, getMetric) {
set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
add_subdirectory (src/openvino/inference_engine)
+# Check Cython version
+if("${CYTHON_VERSION}" VERSION_LESS "0.29")
+ message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found verson ${CYTHON_VERSION}")
+else()
+ message(STATUS "Found Cython version ${CYTHON_VERSION}")
+endif()
+
# install
ie_cpack_add_component(${PYTHON_VERSION} REQUIRED)
include( FindPackageHandleStandardArgs )
FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE )
-mark_as_advanced( CYTHON_EXECUTABLE )
+# Find Cython version
+execute_process(COMMAND ${CYTHON_EXECUTABLE} -V ERROR_VARIABLE CYTHON_OUTPUT OUTPUT_QUIET)
+string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+\\.[0-9]+).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
+
+mark_as_advanced( CYTHON_EXECUTABLE CYTHON_VERSION )
# net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
# input_layer = next(iter(net.inputs))
# n, c, h, w = net.inputs[input_layer]
- # net.reshape({input_layer: (n, c, h*2, w*2)}]
+ # net.reshape({input_layer: (n, c, h*2, w*2)})
# ```
def reshape(self, input_shapes: dict):
cdef map[string, vector[size_t]] c_input_shapes;
import os
import pytest
import warnings
+import threading
from openvino.inference_engine import ie_api as ie
from conftest import model_path, image_path
class InferReqWrap:
def __init__(self, request):
self.request = request
+ self.cv = threading.Condition()
self.request.set_completion_callback(self.callback)
self.status_code = self.request.wait(ie.WaitMode.STATUS_ONLY)
assert self.status_code == ie.StatusCode.INFER_NOT_STARTED
def callback(self, statusCode, userdata):
self.status_code = self.request.wait(ie.WaitMode.STATUS_ONLY)
+ self.cv.acquire()
+ self.cv.notify()
+ self.cv.release()
def execute(self, input_data):
self.request.async_infer(input_data)
+ self.cv.acquire()
+ self.cv.wait()
+ self.cv.release()
status = self.request.wait(ie.WaitMode.RESULT_READY)
assert status == ie.StatusCode.OK
assert self.status_code == ie.StatusCode.OK
*/
explicit CNNNetwork(std::shared_ptr<ICNNNetwork> network): network(network) {
actual = network.get();
- if (actual == nullptr) {
- THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
- }
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
}
/**
* @return outputs Reference to the OutputsDataMap object
*/
virtual OutputsDataMap getOutputsInfo() const {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
OutputsDataMap outputs;
actual->getOutputsInfo(outputs);
return outputs;
* @return inputs Reference to InputsDataMap object
*/
virtual InputsDataMap getInputsInfo() const {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
InputsDataMap inputs;
actual->getInputsInfo(inputs);
return inputs;
* @return The number of layers as an integer value
*/
size_t layerCount() const {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
return actual->layerCount();
}
*
* @return Network name
*/
- const std::string& getName() const noexcept {
+ const std::string& getName() const {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
return actual->getName();
}
* @return The size of batch as a size_t value
*/
virtual size_t getBatchSize() const {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
return actual->getBatchSize();
}
*
* @return A shared pointer of the current network
*/
- operator std::shared_ptr<ICNNNetwork>() {
+ operator ICNNNetwork::Ptr() {
return network;
}
* @return An instance of the current network
*/
operator ICNNNetwork&() {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
return *actual;
}
* @return A const reference of the current network
*/
operator const ICNNNetwork&() const {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
return *actual;
}
*
* @return constant nGraph function
*/
- std::shared_ptr<ngraph::Function> getFunction() noexcept {
+ std::shared_ptr<ngraph::Function> getFunction() {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
return actual->getFunction();
}
*
* @return constant nGraph function
*/
- std::shared_ptr<const ngraph::Function> getFunction() const noexcept {
+ std::shared_ptr<const ngraph::Function> getFunction() const {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
return actual->getFunction();
}
* @return Map of pairs: input name and its dimension.
*/
virtual ICNNNetwork::InputShapes getInputShapes() const {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
ICNNNetwork::InputShapes shapes;
InputsDataMap inputs;
actual->getInputsInfo(inputs);
* @return A vector of Memory State objects
*/
std::vector<MemoryState> QueryState() {
- if (actual == nullptr) {
- THROW_IE_EXCEPTION << "ExecutableNetwork wrapper was not initialized.";
- }
+ if (actual == nullptr) THROW_IE_EXCEPTION << "ExecutableNetwork was not initialized.";
IMemoryState::Ptr pState = nullptr;
auto res = OK;
std::vector<MemoryState> controller;
InferenceEngine::details::SharedObjectLoader::Ptr splg = {}):
actual(request), plg(splg) {
// plg can be null, but not the actual
- if (actual == nullptr) {
- THROW_IE_EXCEPTION << "InferRequest wrapper was not initialized.";
- }
+ if (actual == nullptr) THROW_IE_EXCEPTION << "InferRequest was not initialized.";
}
/**
*/
StatusCode Wait(int64_t millis_timeout) {
ResponseDesc resp;
- if (actual == nullptr) {
- THROW_IE_EXCEPTION << "InferRequest wrapper was not initialized.";
- }
+ if (actual == nullptr) THROW_IE_EXCEPTION << "InferRequest was not initialized.";
auto res = actual->Wait(millis_timeout, &resp);
if (res != OK && res != RESULT_NOT_READY && res != INFER_NOT_STARTED) {
InferenceEngine::details::extract_exception(res, resp.msg);
* @return A shared pointer to underlying IInferRequest interface
*/
operator IInferRequest::Ptr&() {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "InferRequest was not initialized.";
return actual;
}
const Version* GetVersion() {
const Version* versionInfo = nullptr;
IE_SUPPRESS_DEPRECATED_START
+ if (actual == nullptr) THROW_IE_EXCEPTION << "InferencePlugin wrapper was not initialized";
actual->GetVersion(versionInfo);
IE_SUPPRESS_DEPRECATED_END
if (versionInfo == nullptr) {
void QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config,
QueryNetworkResult& res) const {
IE_SUPPRESS_DEPRECATED_START
+ if (actual == nullptr) THROW_IE_EXCEPTION << "InferencePlugin wrapper was not initialized";
actual->QueryNetwork(network, config, res);
IE_SUPPRESS_DEPRECATED_END
if (res.rc != OK) THROW_IE_EXCEPTION << res.resp.msg;
* scope.
*/
explicit CNNNetworkIterator(const ICNNNetwork* network) {
+ if (network == nullptr) THROW_IE_EXCEPTION << "ICNNNetwork object is nullptr";
InputsDataMap inputs;
network->getInputsInfo(inputs);
if (!inputs.empty()) {
* @brief The main constructor
* @param loader Library to load from
*/
- explicit SymbolLoader(std::shared_ptr<Loader> loader): _so_loader(loader) {}
+ explicit SymbolLoader(std::shared_ptr<Loader> loader): _so_loader(loader) {
+ if (_so_loader == nullptr) {
+ THROW_IE_EXCEPTION << "SymbolLoader cannot be created with nullptr";
+ }
+ }
/**
* @brief Calls a function from the library that creates an object and returns StatusCode
StatusCode rc = OK;
/**
- * @brief Response mssage
+ * @brief Response message
*/
ResponseDesc resp;
};
* @param xmlConfigFile A path to .xml file with plugins to load from. If XML configuration file is not specified,
* then default Inference Engine plugins are loaded from the default plugin.xml file.
*/
- explicit Core(const std::string& xmlConfigFile = std::string());
+ explicit Core(const std::string& xmlConfigFile = {});
/**
* @brief Returns plugins version information
* if bin file with the same name was not found, will load IR without weights.
* @return CNNNetwork
*/
- CNNNetwork ReadNetwork(const std::wstring& modelPath, const std::wstring& binPath = std::wstring()) const {
+ CNNNetwork ReadNetwork(const std::wstring& modelPath, const std::wstring& binPath = {}) const {
return ReadNetwork(details::wStringtoMBCSstringChar(modelPath), details::wStringtoMBCSstringChar(binPath));
}
#endif
* if bin file with the same name was not found, will load IR without weights.
* @return CNNNetwork
*/
- CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath = "") const;
+ CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath = {}) const;
/**
* @brief Reads IR xml and bin (with the same name) files
* @param model string with IR
* @return An executable network reference
*/
ExecutableNetwork LoadNetwork(
- const CNNNetwork network, const std::string& deviceName,
- const std::map<std::string, std::string>& config = std::map<std::string, std::string>());
+ const CNNNetwork& network, const std::string& deviceName,
+ const std::map<std::string, std::string>& config = {});
/**
* @brief Registers extension
* @param context Pointer to RemoteContext object
* @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load
* operation
- * @return An executable network reference
+ * @return An executable network object
*/
ExecutableNetwork LoadNetwork(
- const CNNNetwork network, RemoteContext::Ptr context,
- const std::map<std::string, std::string>& config = std::map<std::string, std::string>());
+ const CNNNetwork& network, RemoteContext::Ptr context,
+ const std::map<std::string, std::string>& config = {});
/**
* @brief Registers extension for the specified plugin
*/
ExecutableNetwork ImportNetwork(
const std::string& modelFileName, const std::string& deviceName,
- const std::map<std::string, std::string>& config = std::map<std::string, std::string>());
+ const std::map<std::string, std::string>& config = {});
/**
* @brief Creates an executable network from a previously exported network
* @param deviceName A name of a device to query
* @param network Network object to query
* @param config Optional map of pairs: (config parameter name, config parameter value)
- * @return Pointer to the response message that holds a description of an error if any occurred
+ * @return An object containing a map of pairs a layer name -> a device name supporting this layer.
*/
QueryNetworkResult QueryNetwork(
const ICNNNetwork& network, const std::string& deviceName,
- const std::map<std::string, std::string>& config = std::map<std::string, std::string>()) const;
+ const std::map<std::string, std::string>& config = {}) const;
/**
* @brief Sets configuration for device, acceptable keys can be found in ie_plugin_config.hpp
*
* @param config Map of pairs: (config parameter name, config parameter value)
*/
- void SetConfig(const std::map<std::string, std::string>& config, const std::string& deviceName = std::string());
+ void SetConfig(const std::map<std::string, std::string>& config, const std::string& deviceName = {});
/**
* @brief Gets configuration dedicated to device behaviour.
project(Samples)
+if(POLICY CMP0054)
+ cmake_policy(SET CMP0054 NEW)
+endif()
+
if(POLICY CMP0063)
cmake_policy(SET CMP0063 NEW)
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") #treating warnings as errors
endif ()
- if (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267") #disable some warnings
endif()
else()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-command-line-argument")
elseif(UNIX)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized -Winit-self")
- if(NOT CMAKE_CXX_COMPILER_ID STREQUAL Clang)
+ if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmaybe-uninitialized")
endif()
endif()
set (CMAKE_CXX_STANDARD 11)
set (CMAKE_CXX_EXTENSIONS OFF)
set (CMAKE_CXX_STANDARD_REQUIRED ON)
- if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
endif()
endif()
set_target_properties(gflags_nothreads_static PROPERTIES FOLDER thirdparty)
endif()
-if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
endif()
if(COMMAND add_cpplint_target AND NOT IE_SAMPLE_EXCLUDE_CPPLINT)
if(folder_name STREQUAL "c_samples")
- set(custom_filters "-readability/casting,-runtime/printf")
+ set(custom_filters "-readability/casting")
endif()
add_cpplint_target(${IE_SAMPLE_NAME}_cpplint FOR_TARGETS ${IE_SAMPLE_NAME}
CUSTOM_FILTERS ${custom_filters})
-stream_output Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a multiline output.
-t Optional. Time in seconds to execute topology.
-progress Optional. Show progress bar (can affect performance measurement). Default values is "false".
+ -shape Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
CPU-specific performance options:
-nstreams "<integer>" Optional. Number of streams to use for inference on the CPU or/and GPU in throughput mode
static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.";
#endif
+static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
+ " in case of one input size.";
+
/// @brief Define flag for showing help message <br>
DEFINE_bool(h, false, help_message);
DEFINE_string(dump_config, "", dump_config_message);
#endif
+/// @brief Define flag for input shape <br>
+DEFINE_string(shape, "", shape_message);
+
/**
* @brief This function show a help message
*/
std::cout << " -stream_output " << stream_output_message << std::endl;
std::cout << " -t " << execution_time_message << std::endl;
std::cout << " -progress " << progress_message << std::endl;
+ std::cout << " -shape " << shape_message << std::endl;
std::cout << std::endl << " device-specific performance options:" << std::endl;
std::cout << " -nstreams \"<integer>\" " << infer_num_streams_message << std::endl;
std::cout << " -nthreads \"<integer>\" " << infer_num_threads_message << std::endl;
// ----------------- 5. Resizing network to match image sizes and given batch ----------------------------------
next_step();
-
batchSize = cnnNetwork.getBatchSize();
+ // Parse input shapes if specified
+ InferenceEngine::ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+ bool reshape = false;
+ if (!FLAGS_shape.empty()) {
+ reshape |= updateShapes(shapes, FLAGS_shape, inputInfo);
+ }
if ((FLAGS_b != 0) && (batchSize != FLAGS_b)) {
- ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
- bool reshape = false;
- for (const InputsDataMap::value_type& item : inputInfo) {
- auto layout = item.second->getTensorDesc().getLayout();
-
- int batchIndex = -1;
- if ((layout == Layout::NCHW) || (layout == Layout::NCDHW) ||
- (layout == Layout::NHWC) || (layout == Layout::NDHWC) ||
- (layout == Layout::NC)) {
- batchIndex = 0;
- } else if (layout == CN) {
- batchIndex = 1;
- }
- if ((batchIndex != -1) && (shapes[item.first][batchIndex] != FLAGS_b)) {
- shapes[item.first][batchIndex] = FLAGS_b;
- reshape = true;
- }
- }
- if (reshape) {
- slog::info << "Resizing network to batch = " << FLAGS_b << slog::endl;
- cnnNetwork.reshape(shapes);
- }
+ reshape |= adjustShapesBatch(shapes, FLAGS_b, inputInfo);
+ }
+ if (reshape) {
+ slog::info << "Reshaping network: " << getShapesString(shapes) << slog::endl;
+ startTime = Time::now();
+ cnnNetwork.reshape(shapes);
+ auto duration_ms = double_to_string(get_total_ms_time(startTime));
+ slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl;
+ if (statistics)
+ statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
+ {
+ {"reshape network time (ms)", duration_ms}
+ });
}
-
batchSize = cnnNetwork.getBatchSize();
topology_name = cnnNetwork.getName();
slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize << slog::endl;
#include <utility>
#include <vector>
#include <map>
+#include <regex>
#include <samples/common.hpp>
#include <samples/slog.hpp>
return result;
}
+bool adjustShapesBatch(InferenceEngine::ICNNNetwork::InputShapes& shapes,
+ const size_t batch_size, const InferenceEngine::InputsDataMap& input_info) {
+ bool updated = false;
+ for (auto& item : input_info) {
+ auto layout = item.second->getTensorDesc().getLayout();
+
+ int batch_index = -1;
+ if ((layout == InferenceEngine::Layout::NCHW) || (layout == InferenceEngine::Layout::NCDHW) ||
+ (layout == InferenceEngine::Layout::NHWC) || (layout == InferenceEngine::Layout::NDHWC) ||
+ (layout == InferenceEngine::Layout::NC)) {
+ batch_index = 0;
+ } else if (layout == InferenceEngine::Layout::CN) {
+ batch_index = 1;
+ }
+ if ((batch_index != -1) && (shapes.at(item.first).at(batch_index) != batch_size)) {
+ shapes[item.first][batch_index] = batch_size;
+ updated = true;
+ }
+ }
+ return updated;
+}
+
+bool updateShapes(InferenceEngine::ICNNNetwork::InputShapes& shapes,
+ const std::string shapes_string, const InferenceEngine::InputsDataMap& input_info) {
+ bool updated = false;
+ std::string search_string = shapes_string;
+ auto start_pos = search_string.find_first_of('[');
+ while (start_pos != std::string::npos) {
+ auto end_pos = search_string.find_first_of(']');
+ if (end_pos == std::string::npos)
+ break;
+ auto input_name = search_string.substr(0, start_pos);
+ auto input_shape = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
+ std::vector<size_t> parsed_shape;
+ for (auto& dim : split(input_shape, ',')) {
+ parsed_shape.push_back(std::stoi(dim));
+ }
+ if (!input_name.empty()) {
+ shapes[input_name] = parsed_shape;
+ updated = true;
+ } else {
+ for (auto& item : input_info) {
+ shapes[item.first] = parsed_shape;
+ }
+ updated = true;
+ }
+ search_string = search_string.substr(end_pos + 1);
+ if (search_string.empty() || search_string.front() != ',')
+ break;
+ search_string = search_string.substr(1);
+ start_pos = search_string.find_first_of('[');
+ }
+ if (!search_string.empty())
+ throw std::logic_error("Can't parse `shape` parameter: " + shapes_string);
+ return updated;
+}
+
+std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes) {
+ std::stringstream ss;
+ for (auto& shape : shapes) {
+ if (!ss.str().empty()) ss << ", ";
+ ss << "\'" << shape.first << "': [";
+ for (size_t i = 0; i < shape.second.size(); i++) {
+ if (i > 0) ss << ", ";
+ ss << shape.second.at(i);
+ }
+ ss << "]";
+ }
+ return ss.str();
+}
+
#ifdef USE_OPENCV
void dump_config(const std::string& filename,
const std::map<std::string, std::map<std::string, std::string>>& config) {
uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
const std::string& values_string);
+bool updateShapes(InferenceEngine::ICNNNetwork::InputShapes& shapes,
+ const std::string shapes_string, const InferenceEngine::InputsDataMap& input_info);
+bool adjustShapesBatch(InferenceEngine::ICNNNetwork::InputShapes& shapes,
+ const size_t batch_size, const InferenceEngine::InputsDataMap& input_info);
+std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes);
+
#ifdef USE_OPENCV
void dump_config(const std::string& filename,
const std::map<std::string, std::map<std::string, std::string>>& config);
--- /dev/null
+@echo off
+:: Copyright (C) 2018-2020 Intel Corporation
+:: SPDX-License-Identifier: Apache-2.0
+
+pushd ..\..
+if not exist "vs2017x64" (
+ mkdir "vs2017x64"
+)
+
+cmake -E chdir "vs2017x64" cmake -G "Visual Studio 15 2017 Win64" -T "Intel C++ Compiler 18.0" -DOS_FOLDER=ON ^
+ -DENABLE_MYRIAD=OFF -DENABLE_VPU=OFF -DENABLE_GNA=ON -DENABLE_CLDNN=OFF ^
+ -DENABLE_OPENCV=ON -DENABLE_MKL_DNN=ON ^
+ -DVERBOSE_BUILD=ON -DENABLE_TESTS=ON -DTHREADING=TBB ..
+
+
+chdir
+cd "vs2017x64\thirdparty\"
+"C:\Program Files (x86)\Common Files\Intel\shared files\ia32\Bin\ICProjConvert180.exe" mkldnn.vcxproj /IC
+
+chdir
+cd "..\src\mkldnn_plugin"
+"C:\Program Files (x86)\Common Files\Intel\shared files\ia32\Bin\ICProjConvert180.exe" MKLDNNPlugin.vcxproj /IC
+"C:\Program Files (x86)\Common Files\Intel\shared files\ia32\Bin\ICProjConvert180.exe" test_MKLDNNPlugin.vcxproj /IC
+
+chdir
+cd "..\..\tests\unit"
+"C:\Program Files (x86)\Common Files\Intel\shared files\ia32\Bin\ICProjConvert180.exe" InferenceEngineUnitTests.vcxproj /IC
+
+
+popd
+pause
#!/bin/bash
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
CURRENT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
command -v realpath >/dev/null 2>&1 || { echo >&2 "cpplint require realpath executable but it's not installed. Aborting."; exit 1; }
--- /dev/null
+#!/bin/bash
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+APP_NAME="MyriadFunctionalTests"
+APPS_TO_RUN=$1
+APPS_TO_RUN=${APPS_TO_RUN:=4}
+
+echo "Run in parallel ${APPS_TO_RUN} applications"
+
+TEST_DIR=../../bin/intel64
+
+# Path to test dir is provided
+if [[ -n "$2" ]]; then
+ TEST_DIR=$2
+# Search for test dir with binaries
+else
+ # Windows default
+ if [[ -f "${TEST_DIR}/${APP_NAME}" ]]; then
+ TEST_DIR=${TEST_DIR}
+ # Search for Release or Debug config
+ elif [[ -f "${TEST_DIR}/Release/${APP_NAME}" ]]; then
+ TEST_DIR="$TEST_DIR/Release/"
+ elif [[ -f "${TEST_DIR}/Debug/${APP_NAME}" ]]; then
+ TEST_DIR="$TEST_DIR/Debug/"
+ else
+ echo "Directory with binaries not found!"
+ exit -1
+ fi
+
+fi
+
+echo "Test directory: ${TEST_DIR}"
+cd ${TEST_DIR}
+
+export IE_VPU_MYRIADX=1
+
+pids=""
+
+if [[ "${APPS_TO_RUN}" -ge 1 ]] ; then
+ ./${APP_NAME} --gtest_filter=*VPURegTest*SSD*myriad* &
+ pids+=" $!"
+fi
+
+if [[ "${APPS_TO_RUN}" -ge 2 ]] ; then
+ ./${APP_NAME} --gtest_filter=*VPURegTest*VGG*myriad* &
+ pids+=" $!"
+fi
+
+if [[ "${APPS_TO_RUN}" -ge 3 ]] ; then
+ ./${APP_NAME} --gtest_filter=*VPURegTest*VGG*myriad* &
+ pids+=" $!"
+fi
+
+if [[ "${APPS_TO_RUN}" -ge 4 ]] ; then
+ # For more then 4 multidevice testing
+ for (( VAR = 4; VAR <= ${APPS_TO_RUN}; ++VAR )); do
+ ./${APP_NAME} --gtest_filter=*VPURegTest*YOLO*myriad* &
+ pids+=" $!"
+ done
+fi
+
+
+# Wait for all processes to finish
+sts=""
+for p in ${pids}; do
+ if wait ${p}; then
+ sts+=" 1"
+ else
+ sts+=" 0"
+ fi
+ echo "--- Process $p finished"
+done
+
+idx=0
+exit_code=0
+for s in ${sts}; do
+ if [[ ${s} -eq 1 ]]; then
+ echo "Task $idx PASSED"
+ else
+ echo "Task $idx FAILED"
+ exit_code=1
+ fi
+ ((idx+=1))
+done
+
+exit ${exit_code}
add_subdirectory(hetero_plugin)
+add_subdirectory(multi_device)
+
add_subdirectory(transformations)
add_subdirectory(inference_engine)
#include <ngraph/opsets/opset2.hpp>
#include <ngraph/op/fused/gelu.hpp>
#include <generic_ie.hpp>
+#include <transformations/common_optimizations/common_optimizations.hpp>
#include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
#include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
#include "convert_function_to_cnn_network.hpp"
#undef min
::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
// Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
+ ngraph::pass::CommonOptimizations().run_on_function(nGraphFunc);
+ ngraph::pass::ConvertOpSet3ToOpSet2(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet2ToOpSet1(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet1ToLegacy(transformations_callback).run_on_function(nGraphFunc);
clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, network);
}
};
-ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICore * /*core*/, const InferenceEngine::ICNNNetwork &network,
+ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
const std::map<std::string, std::string> &config) {
// verification of supported input
InferenceEngine::InputsDataMap _networkInputs;
return std::make_shared<CLDNNExecNetwork>(*CloneNetwork(network), context, conf);
}
-ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICore * /*core*/, const InferenceEngine::ICNNNetwork &network,
- RemoteContext::Ptr context,
- const std::map<std::string, std::string> &config) {
+ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
+ RemoteContext::Ptr context,
+ const std::map<std::string, std::string> &config) {
InferenceEngine::InputsDataMap _networkInputs;
network.getInputsInfo(_networkInputs);
check_inputs(_networkInputs);
public:
clDNNEngine();
- InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICore * core, const InferenceEngine::ICNNNetwork &network,
+ InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
const std::map<std::string, std::string> &config) override;
- InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICore * core, const InferenceEngine::ICNNNetwork &network,
- InferenceEngine::RemoteContext::Ptr context,
- const std::map<std::string, std::string> &config) override;
+ InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
+ InferenceEngine::RemoteContext::Ptr context,
+ const std::map<std::string, std::string> &config) override;
void SetConfig(const std::map<std::string, std::string> &config) override;
InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
void GNAPluginNS::backend::AMIntelDNN::Init(void *ptr_memory,
uint32_t num_memory_bytes,
- intel_dnn_number_type_t number_type,
+ intel_dnn_number_type_t compute_precision,
float scale_factor) {
ptr_dnn_memory_ = ptr_memory;
num_bytes_dnn_memory_ = num_memory_bytes;
- number_type_ = number_type;
+ compute_precision_ = compute_precision;
input_scale_factor_ = scale_factor;
ptr_active_outputs_ = nullptr;
reinterpret_cast<void *>(reinterpret_cast<int32_t *>(comp->op.recurrent.ptr_feedbacks) + j * comp_pwl->num_columns_out);
ApplyRecurrentTransform(comp, j, ptr_feedbacks);
// PrintOutputs(i);
- ApplyPiecewiseLinearTransform(comp_pwl, number_type_, num_active_outputs, j);
+ ApplyPiecewiseLinearTransform(comp_pwl, compute_precision_, num_active_outputs, j);
}
i++; // skip next component
} else {
break;
case kDnnConvolutional1dOp:ApplyConvolutional1DTransform(comp);
break;
- case kDnnPiecewiselinearOp:ApplyPiecewiseLinearTransform(comp, number_type_, num_active_outputs);
+ case kDnnPiecewiselinearOp:ApplyPiecewiseLinearTransform(comp, compute_precision_, num_active_outputs);
break;
- case kDnnMaxPoolOp:ApplyMaxPoolTransform(comp, number_type_);
+ case kDnnMaxPoolOp:ApplyMaxPoolTransform(comp, compute_precision_);
break;
case kDnnInterleaveOp:ApplyTranspose(comp);
break;
graph << "}";
}
-void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t number_type) {
- if ((number_type_ == kDnnFloat) && (number_type == kDnnInt)) {
+void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) {
+ if ((compute_precision_ == kDnnFloat) && (logging_precision == kDnnInt)) {
fprintf(stderr, "Error trying to write floating point DNN as integer in GNAPluginNS::backend::AMIntelDNN::WriteDnnText().\n");
fprintf(stderr, " Please convert to integer first.\n");
throw -1;
uint32_t layer = 0;
out_file << "<intel_dnn_file>\n";
- out_file << "<number_type> " << intel_dnn_number_type_name[number_type] << "\n";
+ out_file << "<number_type> " << intel_dnn_number_type_name[logging_precision] << "\n";
out_file << "<softmax_type> " << intel_dnn_softmax_name[softmax_type] << "\n";
out_file << "<num_memory_bytes> " << std::dec << num_bytes_dnn_memory_ << "\n";
out_file << "<num_group> " << std::dec << num_group << "\n";
out_file << "<orientation_out> " << std::dec << (component[i].orientation_out == kDnnInterleavedOrientation ?
"interleaved" : "deinterleaved") << "\n";
- if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+ if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
out_file << "<num_bytes_per_input> " << std::dec << sizeof(float) << "\n";
out_file << "<num_bytes_per_output> " << std::dec << sizeof(float) << "\n";
} else {
float output_scale_factor = component[i].output_scale_factor;
uint32_t num_weight_rows = (component[i].operation == kDnnDiagonalOp) ? 1 : num_rows_out;
uint32_t num_weight_columns = num_rows_in;
- if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+ if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
out_file << "<num_bytes_per_weight> " << std::dec << 4 << "\n";
out_file << "<num_bytes_per_bias> " << std::dec << 4 << "\n";
} else {
out_file << "<num_bytes_per_weight> " << std::dec << num_bytes_per_weight << "\n";
out_file << "<num_bytes_per_bias> " << std::dec << num_bytes_per_bias << "\n";
}
- if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+ if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
out_file << std::setprecision(12) << std::scientific << "<weight_scale_factor> " << 1.0 << "\n";
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
} else {
#ifdef DUMP_WB
for (uint32_t row = 0; row < num_weight_rows; row++) {
for (uint32_t col = 0; col < num_weight_columns; col++) {
- if (number_type == kDnnFloat) {
+ if (logging_precision == kDnnFloat) {
float val =
static_cast<float>(ptr_weight[row * num_weight_columns + col]) * ptr_bias[row].multiplier
/ weight_scale_factor;
#ifdef DUMP_WB
for (uint32_t row = 0; row < num_weight_rows; row++) {
for (uint32_t col = 0; col < num_weight_columns; col++) {
- if (number_type == kDnnFloat) {
+ if (logging_precision == kDnnFloat) {
out_wfile << std::setprecision(12)
<< ptr_weight[row * num_weight_columns + col] / weight_scale_factor << " ";
} else {
}
}
#endif
- } else if (number_type_ == kDnnFloat) {
+ } else if (compute_precision_ == kDnnFloat) {
float *ptr_weight = reinterpret_cast<float *>(component[i].op.affine.ptr_weights);
#ifdef DUMP_WB
for (uint32_t row = 0; row < num_weight_rows; row++) {
fprintf(stderr, "Unsupported weight type in WriteDnnText!\n");
throw -1;
}
- if (number_type_ == kDnnInt) {
+ if (compute_precision_ == kDnnInt) {
if (num_bytes_per_weight == 1) {
intel_compound_bias_t
*ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.affine.ptr_biases);
#ifdef DUMP_WB
for (uint32_t row = 0; row < num_rows_out; row++) {
- out_bfile << std::setw(8) << ptr_biases[row].bias << ", ";
- out_bfile << std::setw(8) << int(ptr_biases[row].multiplier) << "\n";
+ if (logging_precision == kDnnInt) {
+ out_bfile << std::setw(8) << ptr_biases[row].bias << ", ";
+ out_bfile << std::setw(8) << int(ptr_biases[row].multiplier) << "\n";
+ } else {
+ out_bfile << std::setw(8) << ptr_biases[row].bias / output_scale_factor << "\n";
+ }
}
#endif
} else {
int32_t *ptr_biases = reinterpret_cast<int32_t *>(component[i].op.affine.ptr_biases);
#ifdef DUMP_WB
for (uint32_t row = 0; row < num_rows_out; row++) {
- if (number_type == kDnnInt) {
+ if (logging_precision == kDnnInt) {
out_bfile << std::setw(8) << ptr_biases[row] << "\n";
} else {
out_bfile << std::setw(8) << ptr_biases[row] / output_scale_factor << "\n";
out_file << "<num_feature_maps> " << std::dec << num_feature_maps << "\n";
out_file << "<num_feature_map_rows> " << std::dec << num_feature_map_rows << "\n";
out_file << "<num_feature_map_columns> " << std::dec << num_feature_map_columns << "\n";
- if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+ if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
out_file << "<num_bytes_per_weight> " << std::dec << 4 << "\n";
out_file << "<num_bytes_per_bias> " << std::dec << 4 << "\n";
} else {
out_file << "<num_bytes_per_weight> " << std::dec << num_bytes_per_weight << "\n";
out_file << "<num_bytes_per_bias> " << std::dec << num_bytes_per_bias << "\n";
}
- if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+ if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
out_file << std::setprecision(12) << std::scientific << "<weight_scale_factor> " << 1.0 << "\n";
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
} else {
#ifdef DUMP_WB
for (uint32_t row = 0; row < num_filters; row++) {
for (uint32_t col = 0; col < num_filter_coefficients; col++) {
- if (number_type == kDnnFloat) {
+ if (logging_precision == kDnnFloat) {
float val = static_cast<float>(ptr_weight[row * num_filter_coefficients + col])
* ptr_bias[row].multiplier / weight_scale_factor;
out_wfile << std::setprecision(12) <<val << "\n";
#ifdef DUMP_WB
for (uint32_t row = 0; row < num_filters; row++) {
for (uint32_t col = 0; col < num_filter_coefficients; col++) {
- if (number_type == kDnnFloat) {
+ if (logging_precision == kDnnFloat) {
out_wfile << std::setprecision(12)
<< ptr_weight[row * num_filter_coefficients + col] / weight_scale_factor
<< "\n";
}
}
#endif
- } else if (number_type_ == kDnnFloat) {
+ } else if (compute_precision_ == kDnnFloat) {
float *ptr_weight = reinterpret_cast<float *>(component[i].op.conv1D.ptr_filters);
#ifdef DUMP_WB
for (uint32_t row = 0; row < num_filters; row++) {
throw -1;
}
- if (number_type_ == kDnnInt) {
- if (number_type == kDnnInt) {
+ if (compute_precision_ == kDnnInt) {
+ if (logging_precision == kDnnInt) {
if (num_bytes_per_weight == 1) {
intel_compound_bias_t
*ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
uint32_t num_weight_rows = num_columns_out;
uint32_t num_weight_columns = num_columns_in + num_columns_out;
out_file << "<num_vector_delay> " << std::dec << num_vector_delay << "\n";
- if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+ if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
out_file << "<num_bytes_per_weight> " << std::dec << 4 << "\n";
out_file << "<num_bytes_per_bias> " << std::dec << 4 << "\n";
} else {
out_file << "<num_bytes_per_weight> " << std::dec << num_bytes_per_weight << "\n";
out_file << "<num_bytes_per_bias> " << std::dec << num_bytes_per_bias << "\n";
}
- if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+ if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
out_file << std::setprecision(12) << std::scientific << "<weight_scale_factor> " << 1.0 << "\n";
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
} else {
for (uint32_t row = 0; row < num_weight_rows; row++) {
out_file << "<weight_row> ";
for (uint32_t col = 0; col < num_weight_columns; col++) {
- if (number_type == kDnnFloat) {
+ if (logging_precision == kDnnFloat) {
float val =
static_cast<float>(ptr_weight[row * num_weight_columns + col]) * ptr_bias[col].multiplier
/ weight_scale_factor;
for (uint32_t row = 0; row < num_weight_rows; row++) {
out_file << "<weight_row> ";
for (uint32_t col = 0; col < num_weight_columns; col++) {
- if (number_type == kDnnFloat) {
+ if (logging_precision == kDnnFloat) {
out_file << std::setprecision(12) << std::scientific
<< ptr_weight[row * num_weight_columns + col] / weight_scale_factor << " ";
} else {
out_file << "\n";
}
#endif
- } else if (number_type_ == kDnnFloat) {
+ } else if (compute_precision_ == kDnnFloat) {
float *ptr_weight = reinterpret_cast<float *>(component[i].op.recurrent.ptr_weights);
#ifdef DUMP_WB
for (uint32_t row = 0; row < num_weight_rows; row++) {
fprintf(stderr, "Unsupported weight type in WriteDnnText!\n");
throw -1;
}
- if (number_type_ == kDnnInt) {
- if (number_type == kDnnInt) {
+ if (compute_precision_ == kDnnInt) {
+ if (logging_precision == kDnnInt) {
if (num_bytes_per_weight == 1) {
intel_compound_bias_t
*ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
out_file << "<num_bytes_per_slope> " << std::dec << sizeof(int16_t) << "\n";
out_file << "<num_bytes_per_intercept> " << std::dec << sizeof(int16_t) << "\n";
out_file << "<num_bytes_per_offset> " << std::dec << sizeof(int32_t) << "\n";
- if (number_type == kDnnFloat) {
+ if (logging_precision == kDnnFloat) {
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
out_file << "<num_segments> " << std::dec << 0 << "\n";
out_file << "<segment_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
out_file << "<num_segments> " << std::dec << num_segments << "\n";
out_file << "<segment_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.pwl.ptr_segments, ptr_dnn_memory_) << "\n";
- if (number_type_ == kDnnInt) {
+ if (compute_precision_ == kDnnInt) {
out_file << "<slope> ";
for (int segment = 0; segment < num_segments; segment++) {
out_file << "0x" << std::setfill('0') << std::setw(4) << std::hex
for (int j = 0; j < component[i].num_columns_out; j++) {
float floatValue = 0.f;
if (component[i].num_bytes_per_output == 4) {
- if (number_type_ == kDnnInt) {
+ if (compute_precision_ == kDnnInt) {
auto value = reinterpret_cast<int32_t *>(component[i].ptr_outputs)[k * component[i].num_columns_out+ j];
floatValue = static_cast<float>(value);
for (int j = 0; j < component[i].num_columns_in; j++) {
float floatValue = 0.f;
if (component[i].num_bytes_per_input == 4) {
- if (number_type_ == kDnnInt) {
+ if (compute_precision_ == kDnnInt) {
auto value = reinterpret_cast<int32_t *>(component[i].ptr_inputs)[k * component[i].num_columns_in + j];
floatValue = static_cast<float>(value);
} else {
ptr_priors(NULL),
ptr_dnn_memory_(NULL),
num_bytes_dnn_memory_(0),
- number_type_(kDnnNumNumberType) {
+ compute_precision_(kDnnNumNumberType) {
}
~AMIntelDNN();
void Init(void *ptr_memory,
uint32_t num_memory_bytes,
- intel_dnn_number_type_t number_type,
+ intel_dnn_number_type_t compute_precision,
float scale_factor);
void InitActiveList(uint32_t *ptr_active_list);
void WriteGraphWizModel(const char *filename);
- void WriteDnnText(const char *filename, intel_dnn_number_type_t number_type);
+ void WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision);
#if GNA_LIB_VER == 2
uint32_t num_bytes_dnn_memory_;
uint32_t *ptr_active_outputs_;
uint32_t num_active_outputs_;
- intel_dnn_number_type_t number_type_;
+ intel_dnn_number_type_t compute_precision_;
float input_scale_factor_;
uint32_t dump_write_index = 0;
InferenceEngine::RemoteContext::Ptr context) override { THROW_GNA_EXCEPTION << "Not implemented"; }
void Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &result);
void SetCore(InferenceEngine::ICore*) noexcept override {}
- const InferenceEngine::ICore* GetCore() const noexcept override {return nullptr;}
+ InferenceEngine::ICore* GetCore() const noexcept override {return nullptr;}
void Reset();
void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
const std::map<std::string, std::string>& config,
}
public:
- InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICore * core,
+ InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(
const InferenceEngine::ICNNNetwork &network,
const std::map<std::string, std::string> &config) override {
Config updated_config(defaultConfig);
const std::unordered_map<std::string, std::function<Parameter()>> queryApiSupported = {
{METRIC_KEY(AVAILABLE_DEVICES), [this]() {return GetAvailableDevices();}},
{METRIC_KEY(SUPPORTED_CONFIG_KEYS), [this]() {return config.GetSupportedKeys();}},
+ {METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS), [this]() {
+ uint32_t nireq = 1;
+ return nireq;
+ }},
{METRIC_KEY(FULL_DEVICE_NAME), [&options, this]() {
auto availableDevices = GetAvailableDevices().as<std::vector<std::string>>();
#include <utility>
#include <memory>
#include "hetero_async_infer_request.hpp"
-#include <ie_util_internal.hpp>
#include <ie_profiling.hpp>
using namespace HeteroPlugin;
#include "hetero_async_infer_request.hpp"
#include "ie_util_internal.hpp"
#include "hetero_graph_splitter.hpp"
-#include "file_utils.h"
#include "xml_parse_utils.h"
#include <vector>
#include <array>
#include <cstdint>
-#include "details/caseless.hpp"
#include "ie_plugin_config.hpp"
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
-#include "cpp_interfaces/base/ie_inference_plugin_api.hpp"
#include "hetero/hetero_plugin_config.hpp"
-#include "precision_utils.h"
#include "hetero_plugin.hpp"
#include "network_serializer.h"
HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::ICNNNetwork& network_,
const Engine::Configs& config,
- Engine* plugin):
+ Engine* heteroPlugin):
InferenceEngine::ExecutableNetworkThreadSafeDefault(
nullptr, std::make_shared<InferenceEngine::ImmediateExecutor>()),
- _plugin{plugin},
+ _heteroPlugin(heteroPlugin),
_name{network_.getName()},
_config{config} {
auto networkPtr = cloneNet(network_);
if (allEmpty) {
auto it = _config.find("TARGET_FALLBACK");
if (it != _config.end()) {
- plugin->SetAffinity(network, _config);
+ _heteroPlugin->SetAffinity(network, _config);
} else {
THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
}
network.getOutputsInfo(externalOutputsData);
auto subgraphs = splitGraph(network, getAffinities(network));
-
sortSubgraphs(subgraphs);
-
- std::vector<NetworkDesc> descs;
- std::vector<CNNLayerPtr> tempLayers;
-
- for (auto &&subgraph : subgraphs) {
- assert(!subgraph.empty());
- auto affinity = (*subgraph.begin())->affinity;
- assert(!affinity.empty());
- _affinities.push_back(affinity);
- if (_plugin->_plugins.end() == _plugin->_plugins.find(affinity)) {
- IE_SUPPRESS_DEPRECATED_START
- _plugin->_plugins[affinity] = _plugin->GetDevicePlugin(affinity);
- IE_SUPPRESS_DEPRECATED_END
- }
- }
-
if (dumpDotFile) {
std::stringstream stream(std::stringstream::out);
stream << "hetero_subgraphs_" << network.getName() << ".dot";
networkStats = nullptr;
}
+ std::vector<NetworkDesc> descs;
+ std::vector<CNNLayerPtr> tempLayers;
for (auto &&subgraph : subgraphs) {
auto affinity = (*subgraph.begin())->affinity;
tempLayers.assign(subgraph.begin(), subgraph.end());
inp->second->getPreProcess() = it.second->getPreProcess();
}
}
+
// go over all inputs/outputs and right now
// set precision for intermediate data (not for external) to FP32
- // later on we have to add Plugin::getPreferableInputPrecision(network) and
- // Plugin::getPreferableOutputPrecision(network) and set precision based on this info
- // TODO(amalyshe) add clever selectino of precision for intermediate blobs
for (auto &&it : clonedInputs) {
if (externalInputsData.find(it.first) == externalInputsData.end()) {
it.second->setPrecision(Precision::FP32);
}));
auto cfg = _config;
- cfg[PluginConfigInternalParams::KEY_SUBNETWORK_WITH_NETWORK_INPUTS] = isInputSubnetwork
- ? CONFIG_VALUE(YES)
- : CONFIG_VALUE(NO);
- IE_SUPPRESS_DEPRECATED_START
- auto plugin = _plugin->_plugins[d._device];
- d._network = plugin._ref.LoadNetwork(d._clonedNetwork, Engine::GetSupportedConfig(plugin._config, cfg, plugin._ref));
- IE_SUPPRESS_DEPRECATED_END
- }
-
- networks = std::move(descs);
-}
+ cfg[PluginConfigInternalParams::KEY_SUBNETWORK_WITH_NETWORK_INPUTS] =
+ isInputSubnetwork ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
-namespace {
+ auto deviceName = d._device;
+ auto metaDevices = _heteroPlugin->GetDevicePlugins(deviceName, cfg);
+ assert(metaDevices.size() == 1);
-IE_SUPPRESS_DEPRECATED_START
-IInferencePluginAPI * getInferencePluginAPIInterface(IInferencePlugin * iplugin) {
- return dynamic_cast<IInferencePluginAPI *>(iplugin);
-}
+ auto loadConfig = metaDevices[deviceName];
+ d._network = _heteroPlugin->GetCore()->LoadNetwork(d._clonedNetwork, deviceName, loadConfig);
+ }
-IInferencePluginAPI * getInferencePluginAPIInterface(InferenceEnginePluginPtr iplugin) {
- return getInferencePluginAPIInterface(static_cast<IInferencePlugin *>(iplugin.operator->()));
+ networks = std::move(descs);
}
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace
HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream& heteroModel,
const std::map<std::string, std::string>& configs,
- Engine* plugin) :
- _plugin(plugin) {
+ Engine* heteroPlugin) :
+ _heteroPlugin(heteroPlugin) {
std::string heteroXmlStr;
std::getline(heteroModel, heteroXmlStr);
pugi::xml_node subnetworksNode = heteroNode.child("subnetworks");
for (auto subnetworkNode = subnetworksNode.child("subnetwork"); !subnetworkNode.empty();
subnetworkNode = subnetworkNode.next_sibling("subnetwork")) {
- auto device = GetStrAttr(subnetworkNode, "device");
- _affinities.push_back(device);
-
- if (_plugin->_plugins.end() == _plugin->_plugins.find(device)) {
- IE_SUPPRESS_DEPRECATED_START
- _plugin->_plugins[device] = _plugin->GetDevicePlugin(device);
- IE_SUPPRESS_DEPRECATED_END
- }
+ auto deviceName = GetStrAttr(subnetworkNode, "device");
- auto& plugin = _plugin->_plugins[device];
- auto supportedConfig = Engine::GetSupportedConfig(plugin._config, importedConfigs, plugin._ref);
- IE_SUPPRESS_DEPRECATED_START
- auto pluginAPI = getInferencePluginAPIInterface(plugin._ref);
- IE_SUPPRESS_DEPRECATED_END
+ auto metaDevices = _heteroPlugin->GetDevicePlugins(deviceName, importedConfigs);
+ assert(metaDevices.size() == 1);
+ auto& loadConfig = metaDevices[deviceName];
InferenceEngine::ExecutableNetwork executableNetwork;
CNNNetwork cnnnetwork;
bool loaded = false;
try {
- executableNetwork = pluginAPI->ImportNetwork(heteroModel, supportedConfig);
+ executableNetwork = _heteroPlugin->GetCore()->ImportNetwork(heteroModel, deviceName, loadConfig);
} catch(InferenceEngine::details::InferenceEngineException& ie_ex) {
if (std::string::npos != std::string{ie_ex.what()}.find(NOT_IMPLEMENTED_str)) {
// read XML content
heteroModel.read(dataBlob->buffer(), dataSize);
}
- cnnnetwork = _plugin->GetCore()->ReadNetwork(xmlString, std::move(dataBlob));
+ cnnnetwork = _heteroPlugin->GetCore()->ReadNetwork(xmlString, std::move(dataBlob));
auto inputs = cnnnetwork.getInputsInfo();
auto inputsNode = subnetworkNode.child("inputs");
for (auto inputNode = inputsNode.child("input"); !inputNode.empty(); inputNode = inputNode.next_sibling("input")) {
for (auto outputNode = outputsNode.child("output"); !outputNode.empty(); outputNode = outputNode.next_sibling("output")) {
outputs[GetStrAttr(outputNode, "name")]->setPrecision(Precision::FromStr(GetStrAttr(outputNode, "precision")));
}
- IE_SUPPRESS_DEPRECATED_START
- executableNetwork = plugin._ref.LoadNetwork(cnnnetwork, supportedConfig);
- IE_SUPPRESS_DEPRECATED_END
+ executableNetwork = _heteroPlugin->GetCore()->LoadNetwork(cnnnetwork, deviceName, loadConfig);
loaded = true;
} else {
throw;
}
descs.emplace_back(NetworkDesc{
- device,
+ deviceName,
loaded ? CNNNetwork{cloneNet(static_cast<InferenceEngine::ICNNNetwork&>(cnnnetwork))} : CNNNetwork{},
executableNetwork,
});
auto heteroInferRequest = std::dynamic_pointer_cast<HeteroInferRequest>(
CreateInferRequestImpl(_networkInputs, _networkOutputs));
heteroInferRequest->setPointerToExecutableNetworkInternal(shared_from_this());
- auto asyncTreadSafeImpl = std::make_shared<HeteroAsyncInferRequest>(heteroInferRequest, _taskExecutor, _callbackExecutor);
- asyncRequest.reset(new InferRequestBase<HeteroAsyncInferRequest>(asyncTreadSafeImpl),
+ auto asyncThreadSafeImpl = std::make_shared<HeteroAsyncInferRequest>(heteroInferRequest, _taskExecutor, _callbackExecutor);
+ asyncRequest.reset(new InferRequestBase<HeteroAsyncInferRequest>(asyncThreadSafeImpl),
[](IInferRequest *p) { p->Release(); });
- asyncTreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
+ asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
}
void HeteroExecutableNetwork::GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *) const {
/**
* @brief constructor
*/
- HeteroExecutableNetwork(const InferenceEngine::ICNNNetwork& network,
+ HeteroExecutableNetwork(const InferenceEngine::ICNNNetwork& network,
const std::map<std::string, std::string>& config,
Engine* plugin);
};
std::vector<NetworkDesc> networks;
- Engine* _plugin;
+ Engine* _heteroPlugin;
std::string _name;
- std::vector<std::string> _affinities;
std::map<std::string, std::string> _config;
};
//
#include "ie_metric_helpers.hpp"
-#include "ie_plugin_dispatcher.hpp"
#include "hetero_plugin.hpp"
-#include "ie_util_internal.hpp"
#include <memory>
#include <vector>
#include <map>
#include "hetero/hetero_plugin_config.hpp"
#include <cpp_interfaces/base/ie_plugin_base.hpp>
#include "hetero_executable_network.hpp"
-#include "cpp_interfaces/base/ie_inference_plugin_api.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::PluginConfigParams;
"heteroPlugin" // plugin description message
};
-void Engine::GetVersion(const Version *&versionInfo)noexcept {
- versionInfo = &heteroPluginDescription;
-}
-
Engine::Engine() {
_pluginName = "HETERO";
- _config[InferenceEngine::PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = "YES";
+ _config[KEY_EXCLUSIVE_ASYNC_REQUESTS] = YES;
_config[HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)] = NO;
}
-InferenceEngine::ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(const ICore* /*core*/,
- const InferenceEngine::ICNNNetwork& network,
+namespace {
+
+Engine::Configs mergeConfigs(Engine::Configs config, const Engine::Configs & local) {
+ for (auto && kvp : local) {
+ config[kvp.first] = kvp.second;
+ }
+ return config;
+}
+
+} // namespace
+
+InferenceEngine::ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork& network,
const Configs& config) {
- // TODO(amalyshe) do we need here verification of input precisions?
- Configs tconfig;
- tconfig = config;
-
- // we must not override the parameter, but need to copy everything from plugin config
- for (auto && c : _config) {
- if (tconfig.find(c.first) == tconfig.end()) {
- tconfig[c.first] = c.second;
- }
+ if (GetCore() == nullptr) {
+ THROW_IE_EXCEPTION << "Please, work with HETERO device via InferencEngine::Core object";
}
- return std::make_shared<HeteroExecutableNetwork>(*cloneNet(network), tconfig, this);
+ return std::make_shared<HeteroExecutableNetwork>(*cloneNet(network), mergeConfigs(_config, config), this);
}
ExecutableNetwork Engine::ImportNetworkImpl(std::istream& heteroModel, const Configs& config) {
- Configs tconfig;
- tconfig = config;
-
- // we must not override the parameter, but need to copy everything from plugin config
- for (auto && c : _config) {
- if (tconfig.find(c.first) == tconfig.end()) {
- tconfig[c.first] = c.second;
- }
+ if (GetCore() == nullptr) {
+ THROW_IE_EXCEPTION << "Please, work with HETERO device via InferencEngine::Core object";
}
IExecutableNetwork::Ptr executableNetwork;
- // Use config provided by an user ignoring default config
executableNetwork.reset(new ExecutableNetworkBase<ExecutableNetworkInternal>(
- std::make_shared<HeteroExecutableNetwork>(heteroModel, tconfig, this)),
+ std::make_shared<HeteroExecutableNetwork>(heteroModel, mergeConfigs(_config, config), this)),
[](InferenceEngine::details::IRelease *p) {p->Release();});
return ExecutableNetwork{executableNetwork};
}
-namespace {
-
-IE_SUPPRESS_DEPRECATED_START
-
-IInferencePluginAPI * getInferencePluginAPIInterface(IInferencePlugin * iplugin) {
- return dynamic_cast<IInferencePluginAPI *>(iplugin);
-}
-
-IInferencePluginAPI * getInferencePluginAPIInterface(InferenceEnginePluginPtr iplugin) {
- return getInferencePluginAPIInterface(static_cast<IInferencePlugin *>(iplugin.operator->()));
-}
-
-IInferencePluginAPI * getInferencePluginAPIInterface(InferencePlugin plugin) {
- return getInferencePluginAPIInterface(static_cast<InferenceEnginePluginPtr>(plugin));
-}
-
-} // namespace
-
-Engine::Configs Engine::GetSupportedConfig(const Engine::Configs& globalConfig,
- const Engine::Configs& localConfig,
- const InferenceEngine::InferencePlugin& plugin) {
- auto pluginApi = getInferencePluginAPIInterface(plugin);
- std::vector<std::string> supportedConfigKeys = pluginApi->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {});
+Engine::Configs Engine::GetSupportedConfig(const Engine::Configs& config, const std::string & deviceName) const {
+ std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
Engine::Configs supportedConfig;
for (auto&& key : supportedConfigKeys) {
- auto itKey = localConfig.find(key);
- if (localConfig.end() != itKey) {
+ auto itKey = config.find(key);
+ if (config.end() != itKey) {
supportedConfig[key] = itKey->second;
- } else {
- itKey = globalConfig.find(key);
- if (globalConfig.end() != itKey) {
- supportedConfig[key] = itKey->second;
- }
}
}
return supportedConfig;
}
-Engine::PluginEntry Engine::GetDevicePlugin(const std::string& deviceWithID) const {
- InferenceEngine::InferencePlugin plugin;
- DeviceIDParser deviceParser(deviceWithID);
- std::string deviceName = deviceParser.getDeviceName();
-
- if (nullptr == _core) {
- IE_SUPPRESS_DEPRECATED_START
- // try to create plugin
- PluginDispatcher dispatcher({file_name_t()});
- plugin = dispatcher.getPluginByDevice(deviceName);
- IE_SUPPRESS_DEPRECATED_END
- } else {
- plugin = InferencePlugin{_core->GetPluginByName(deviceName)};
- }
-
- try {
- for (auto&& ext : _extensions) {
- plugin.AddExtension(ext);
+Engine::DeviceMetaInformationMap Engine::GetDevicePlugins(const std::string& targetFallback,
+ const Configs & localConfig) const {
+ auto getDeviceConfig = [&](const std::string & deviceWithID) {
+ DeviceIDParser deviceParser(deviceWithID);
+ std::string deviceName = deviceParser.getDeviceName();
+ Configs tconfig = mergeConfigs(_config, localConfig);
+
+ // set device ID if any
+ std::string deviceIDLocal = deviceParser.getDeviceID();
+ if (!deviceIDLocal.empty()) {
+ tconfig[KEY_DEVICE_ID] = deviceIDLocal;
}
- } catch (InferenceEngine::details::InferenceEngineException &) {}
-
- Configs pluginConfig = GetSupportedConfig(_config, {}, plugin);
-
- // set device ID if any
- std::string deviceIDLocal = deviceParser.getDeviceID();
- if (!deviceIDLocal.empty()) {
- pluginConfig = GetSupportedConfig(pluginConfig, { { KEY_DEVICE_ID, deviceIDLocal } }, plugin);
- }
-
- return { plugin, pluginConfig };
-}
-IE_SUPPRESS_DEPRECATED_END
+ return GetSupportedConfig(tconfig, deviceName);
+ };
-Engine::Plugins Engine::GetDevicePlugins(const std::string& targetFallback) const {
- auto devices = InferenceEngine::DeviceIDParser::getHeteroDevices(targetFallback);
- Engine::Plugins plugins = _plugins;
- for (auto&& device : devices) {
- auto itPlugin = plugins.find(device);
- if (plugins.end() == itPlugin) {
- IE_SUPPRESS_DEPRECATED_START
- plugins[device] = GetDevicePlugin(device);
- IE_SUPPRESS_DEPRECATED_END
+ auto fallbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(targetFallback);
+ Engine::DeviceMetaInformationMap metaDevices;
+ for (auto&& deviceName : fallbackDevices) {
+ auto itPlugin = metaDevices.find(deviceName);
+ if (metaDevices.end() == itPlugin) {
+ metaDevices[deviceName] = getDeviceConfig(deviceName);
}
}
- return plugins;
-}
-
-Engine::Plugins Engine::GetDevicePlugins(const std::string& targetFallback) {
- _plugins = const_cast<const Engine*>(this)->GetDevicePlugins(targetFallback);
- return _plugins;
+ return metaDevices;
}
void Engine::SetConfig(const Configs &configs) {
for (auto&& config : configs) {
_config[config.first] = config.second;
}
-
- for (auto&& plugin : _plugins) {
- plugin.second._config = GetSupportedConfig(plugin.second._config, configs, plugin.second._ref);
- }
-}
-
-void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
- _extensions.emplace_back(extension);
- try {
- for (auto&& plugin : _plugins) {
- IE_SUPPRESS_DEPRECATED_START
- plugin.second._ref.AddExtension(extension);
- IE_SUPPRESS_DEPRECATED_END
- }
- } catch (InferenceEngine::details::InferenceEngineException &) {}
}
HeteroLayerColorer::HeteroLayerColorer(const std::vector<std::string>& devices) {
}
void Engine::SetAffinity(InferenceEngine::ICNNNetwork &network, const Configs &config) {
- Configs tconfig = _config;
- for (auto && value : config) {
- tconfig[value.first] = value.second;
- }
-
- auto it = tconfig.find("TARGET_FALLBACK");
- if (it == tconfig.end()) {
- THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
- }
-
- GetDevicePlugins(it->second);
QueryNetworkResult qr;
- QueryNetwork(network, tconfig, qr);
+ QueryNetwork(network, config, qr);
details::CNNNetworkIterator i(&network);
while (i != details::CNNNetworkIterator()) {
i++;
}
- if (YES == tconfig[HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)]) {
+ auto dumpDot = [](const Configs & config) {
+ auto it = config.find(HETERO_CONFIG_KEY(DUMP_GRAPH_DOT));
+ return it != config.end() ? it->second == YES : false;
+ };
+
+ if (dumpDot(config) || dumpDot(_config)) {
std::unordered_set<std::string> devicesSet;
details::CNNNetworkIterator i(&network);
while (i != details::CNNNetworkIterator()) {
stream << "hetero_affinity_" << network.getName() << ".dot";
std::ofstream file(stream.str());
-
saveGraphToDot(network, file, HeteroLayerColorer{devices});
}
}
void Engine::QueryNetwork(const ICNNNetwork &network, const Configs& config, QueryNetworkResult &qr) const {
- auto it = config.find("TARGET_FALLBACK");
- if (it == config.end()) {
- it = _config.find("TARGET_FALLBACK");
-
- if (it == _config.end()) {
- THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
- }
+ if (GetCore() == nullptr) {
+ THROW_IE_EXCEPTION << "Please, work with HETERO device via InferencEngine::Core object";
}
- Plugins plugins = GetDevicePlugins(it->second);
+ auto tconfig = mergeConfigs(_config, config);
+ auto it = tconfig.find("TARGET_FALLBACK");
+ if (it == tconfig.end()) {
+ THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
+ }
- qr.rc = StatusCode::OK;
+ std::string fallbackDevicesStr = it->second;
+ DeviceMetaInformationMap metaDevices = GetDevicePlugins(fallbackDevicesStr, tconfig);
std::map<std::string, QueryNetworkResult> queryResults;
- // go over devices, create appropriate plugins and
- for (auto&& value : plugins) {
- auto& device = value.first;
- auto& plugin = value.second;
- QueryNetworkResult r;
- IE_SUPPRESS_DEPRECATED_START
- plugin._ref.QueryNetwork(network, GetSupportedConfig(plugin._config, config, plugin._ref), r);
- IE_SUPPRESS_DEPRECATED_END
- queryResults[device] = r;
+ // go over devices and call query network
+ for (auto&& metaDevice : metaDevices) {
+ auto& deviceName = metaDevice.first;
+ queryResults[deviceName] = GetCore()->QueryNetwork(network, deviceName, metaDevice.second);
}
// WARNING: Here is devices with user set priority
- auto falbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(it->second);
+ auto fallbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(fallbackDevicesStr);
details::CNNNetworkIterator i(&network);
while (i != details::CNNNetworkIterator()) {
CNNLayer::Ptr layer = *i;
- for (auto&& device : falbackDevices) {
- auto& deviceQueryResult = queryResults[device];
+ for (auto&& deviceName : fallbackDevices) {
+ auto& deviceQueryResult = queryResults[deviceName];
if (deviceQueryResult.supportedLayersMap.find(layer->name) != deviceQueryResult.supportedLayersMap.end()) {
- qr.supportedLayersMap[layer->name] = device;
+ qr.supportedLayersMap[layer->name] = deviceName;
break;
}
}
i++;
}
+
+ // set OK status
+ qr.rc = StatusCode::OK;
}
Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter> & /*options*/) const {
IE_ASSERT(it != _config.end());
bool dump = it->second == YES;
return { dump };
+ } else if (name == "TARGET_FALLBACK") {
+ auto it = _config.find("TARGET_FALLBACK");
+ if (it == _config.end()) {
+ THROW_IE_EXCEPTION << "Value for TARGET_FALLBACK is not set";
+ } else {
+ return { it->second };
+ }
} else {
THROW_IE_EXCEPTION << "Unsupported config key: " << name;
}
class Engine : public InferenceEngine::InferencePluginInternal {
public:
using Configs = std::map<std::string, std::string>;
-
- struct PluginEntry {
- IE_SUPPRESS_DEPRECATED_START
- InferenceEngine::InferencePlugin _ref;
- IE_SUPPRESS_DEPRECATED_END
- Configs _config;
- };
-
- using Plugins = std::unordered_map<std::string, PluginEntry >;
-
- using Devices = std::vector<std::string>;
+ using DeviceMetaInformationMap = std::unordered_map<std::string, Configs>;
Engine();
- void GetVersion(const InferenceEngine::Version *&versionInfo) noexcept;
-
InferenceEngine::ExecutableNetworkInternal::Ptr
- LoadExeNetworkImpl(const InferenceEngine::ICore * core, const InferenceEngine::ICNNNetwork &network, const Configs &config) override;
- void SetConfig(const Configs &config) override;
-
- void SetAffinity(InferenceEngine::ICNNNetwork& network, const Configs &config);
+ LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const Configs &config) override;
- void AddExtension(InferenceEngine::IExtensionPtr extension)override;
+ void SetConfig(const Configs &config) override;
void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
const Configs& config, InferenceEngine::QueryNetworkResult &res) const override;
- InferenceEngine::Parameter GetMetric(const std::string& name,
- const std::map<std::string, InferenceEngine::Parameter> & options) const override;
-
- InferenceEngine::Parameter GetConfig(const std::string& name,
- const std::map<std::string, InferenceEngine::Parameter> & options) const override;
-
- IE_SUPPRESS_DEPRECATED_START
-
- PluginEntry GetDevicePlugin(const std::string& device) const;
+ InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string,
+ InferenceEngine::Parameter> & options) const override;
- static Configs GetSupportedConfig(const Configs& globalConfig, const Configs& localConfig, const InferenceEngine::InferencePlugin& plugin);
+ InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string,
+ InferenceEngine::Parameter> & options) const override;
- IE_SUPPRESS_DEPRECATED_END
+ ExecutableNetwork ImportNetworkImpl(std::istream& heteroModel, const Configs& config) override;
- Plugins GetDevicePlugins(const std::string& targetFallback);
- Plugins GetDevicePlugins(const std::string& targetFallback) const;
+ void SetAffinity(InferenceEngine::ICNNNetwork& network, const Configs &config);
- ExecutableNetwork ImportNetworkImpl(std::istream& heteroModel, const Configs& config) override;
+ DeviceMetaInformationMap GetDevicePlugins(const std::string& targetFallback,
+ const Configs & localConfig) const;
- Plugins _plugins;
- std::vector<InferenceEngine::IExtensionPtr> _extensions;
+private:
+ Configs GetSupportedConfig(const Configs& config, const std::string & deviceName) const;
};
struct HeteroLayerColorer {
target_compile_definitions(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:ittnotify,INTERFACE_COMPILE_DEFINITIONS>)
endif()
-if(ENABLE_IR_READER)
- target_compile_definitions(${TARGET_NAME}_obj PRIVATE ENABLE_IR_READER)
-endif()
-
target_include_directories(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>)
if(ENABLE_MKL_DNN)
target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)
ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
- POSSIBLE_PLUGINS HeteroPlugin clDNNPlugin GNAPlugin MKLDNNPlugin myriadPlugin)
+ POSSIBLE_PLUGINS MultiDevicePlugin HeteroPlugin clDNNPlugin GNAPlugin MKLDNNPlugin myriadPlugin)
# Static library used for unit tests which are always built
#include <transformations/common_optimizations/common_optimizations.hpp>
#include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
#include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
#include <transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.hpp>
#include "ngraph_ops/eltwise.hpp"
// WA: for cnnNetwork ngraph constructor
CNNNetwork::CNNNetwork(const std::shared_ptr<const ngraph::Function>& graph) {
+ if (graph == nullptr) {
+ THROW_IE_EXCEPTION << "CNNNetwork was not initialized: 'graph' object is empty";
+ }
+
// Copy nGraph function
network = std::make_shared<CNNNetworkNGraphImpl>(copyFunction(graph, false, {}));
actual = network.get();
keep_input_info(*this, ptr);
}
for (auto& output : _outputData) {
- // Convert precision into native format. Be consistent with possible convertation to CNNNetwork later.
- if (output.second->getPrecision() != Precision::FP32 &&
+ // Convert precision into native format. Be consistent with possible conversion to CNNNetwork later.
+ if (output.second->getPrecision() == Precision::I64) {
+ output.second->setPrecision(Precision::I32);
+ } else if (output.second->getPrecision() != Precision::FP32 &&
output.second->getPrecision() != Precision::I32) {
output.second->setPrecision(Precision::FP32);
}
::ngraph::op::GenericIE::DisableReshape noReshape(graph);
::ngraph::pass::CommonOptimizations().run_on_function(graph);
+ ::ngraph::pass::ConvertOpSet3ToOpSet2().run_on_function(graph);
::ngraph::pass::ConvertOpSet2ToOpSet1().run_on_function(graph);
::ngraph::pass::ConvertOpSet1ToLegacy().run_on_function(graph);
network = InferenceEngine::details::convertFunctionToICNNNetwork(graph, *this);
::ngraph::op::GenericIE::DisableReshape noReshape(graph);
::ngraph::pass::CommonOptimizations().run_on_function(graph);
+ ::ngraph::pass::ConvertOpSet3ToOpSet2().run_on_function(graph);
::ngraph::pass::ConvertOpSet2ToOpSet1().run_on_function(graph);
::ngraph::pass::ConvertOpSet1ToLegacy().run_on_function(graph);
cnnNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, *this);
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#ifdef _WIN32
-#define _WINSOCKAPI_
-#include <windows.h>
-
-BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) {
- switch (ul_reason_for_call) {
- case DLL_PROCESS_ATTACH:
- case DLL_THREAD_ATTACH:
- case DLL_THREAD_DETACH:
- case DLL_PROCESS_DETACH:
- break;
- }
- return TRUE;
-}
-
-#endif
#include <string>
#include <utility>
#include <vector>
+#include <mutex>
#include <ngraph/opsets/opset.hpp>
#include "cpp/ie_cnn_net_reader.h"
return getInferencePluginAPIInterface(static_cast<InferenceEnginePluginPtr>(plugin));
}
+template <typename T>
+struct Parsed {
+ std::string _deviceName;
+ std::map<std::string, T> _config;
+};
+
+template <typename T = Parameter>
+Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::map<std::string, T>& config = {}) {
+ auto config_ = config;
+ auto deviceName_ = deviceName;
+ if (deviceName_.find("HETERO:") == 0) {
+ deviceName_ = "HETERO";
+ config_["TARGET_FALLBACK"] = deviceName.substr(7);
+ } else if (deviceName_.find("MULTI:") == 0) {
+ deviceName_ = "MULTI";
+ config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
+ } else {
+ DeviceIDParser parser(deviceName_);
+ deviceName_ = parser.getDeviceName();
+ std::string deviceIDLocal = parser.getDeviceID();
+
+ if (!deviceIDLocal.empty()) {
+ config_[KEY_DEVICE_ID] = deviceIDLocal;
+ }
+ }
+ return {deviceName_, config_};
+}
+
+Parameter copyParameterValue(const Parameter & value) {
+ if (value.is<bool>()) {
+ return { value.as<bool>() };
+ } else if (value.is<int>()) {
+ return { value.as<int>() };
+ } else if (value.is<unsigned int>()) {
+ return { value.as<unsigned int>() };
+ } else if (value.is<float>()) {
+ return { value.as<float>() };
+ } else if (value.is<std::string>()) {
+ return { value.as<std::string>() };
+ } else if (value.is<std::vector<std::string> >()) {
+ return { value.as<std::vector<std::string> >() };
+ } else if (value.is<std::vector<int> >()) {
+ return { value.as<std::vector<int> >() };
+ } else if (value.is<std::vector<float> >()) {
+ return { value.as<std::vector<float> >() };
+ } else if (value.is<std::vector<unsigned int> >()) {
+ return { value.as<std::vector<unsigned int> >() };
+ } else if (value.is<std::tuple<unsigned int, unsigned int, unsigned int> >()) {
+ return { value.as<std::tuple<unsigned int, unsigned int, unsigned int> >() };
+ } else if (value.is<std::tuple<unsigned int, unsigned int> >()) {
+ return { value.as<std::tuple<unsigned int, unsigned int> >() };
+ }
+
+ return std::move(value);
+}
+
} // namespace
CNNNetReaderPtr CreateCNNNetReaderPtr() noexcept {
};
/**
- * Hold original blob in order to avoid situations when original blob is allocated on stack
+ * @brief Holds original blob in order to avoid situations
+ * when original blob is allocated on stack
*/
class WeightsHolderBlob : public TBlob<uint8_t> {
Blob::CPtr originBlob;
std::vector<IExtensionPtr> extensions;
std::map<std::string, PluginDescriptor> pluginRegistry;
+ mutable std::mutex pluginsMutex; // to lock parallel access to pluginRegistry and plugins
public:
Impl();
/**
* @brief Register plugins for devices which are located in .xml configuration file. The function supports UNICODE path
- * @param xmlConfigFile - an .xml configuraion with device / plugin information
+ * @param xmlConfigFile An .xml configuraion with device / plugin information
*/
void RegisterPluginsInRegistry(const std::string& xmlConfigFile) {
+ std::lock_guard<std::mutex> lock(pluginsMutex);
+
auto parse_result = ParseXml(xmlConfigFile.c_str());
if (!parse_result.error_msg.empty()) {
THROW_IE_EXCEPTION << parse_result.error_msg;
StatusCode rt = cnnReader->ReadNetwork(modelPath.c_str(), &desc);
if (rt != OK) THROW_IE_EXCEPTION << desc.msg;
if (cnnReader->getVersion(&desc) >= 10) {
- cnnReader->addExtensions(getExtensions());
+ std::lock_guard<std::mutex> lock(pluginsMutex);
+ cnnReader->addExtensions(GetExtensions());
}
std::string bPath = binPath;
if (bPath.empty()) {
StatusCode rt = cnnReader->ReadNetwork(model.data(), model.length(), &desc);
if (rt != OK) THROW_IE_EXCEPTION << desc.msg;
if (cnnReader->getVersion(&desc) >= 10) {
- cnnReader->addExtensions(getExtensions());
+ std::lock_guard<std::mutex> lock(pluginsMutex);
+ cnnReader->addExtensions(GetExtensions());
}
TBlob<uint8_t>::Ptr weights_ptr;
if (weights) {
return CNNNetwork(cnnReader);
}
+ ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
+ const std::map<std::string, std::string>& config) override {
+ IE_PROFILING_AUTO_SCOPE(Core::LoadNetwork)
+ auto parsed = parseDeviceNameIntoConfig(deviceName, config);
+ IE_SUPPRESS_DEPRECATED_START
+ return GetCPPPluginByName(parsed._deviceName).LoadNetwork(network, parsed._config);
+ IE_SUPPRESS_DEPRECATED_END
+ }
+
IE_SUPPRESS_DEPRECATED_START
+ ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName,
+ const std::map<std::string, std::string>& config) override {
+ auto parsed = parseDeviceNameIntoConfig(deviceName, config);
+
+ if (parsed._deviceName.empty()) {
+ ExportMagic magic = {};
+ auto currentPos = networkModel.tellg();
+ networkModel.read(magic.data(), magic.size());
+ auto exportedWithName = (exportMagic == magic);
+ if (exportedWithName) {
+ std::getline(networkModel, parsed._deviceName);
+ }
+ networkModel.seekg(currentPos, networkModel.beg);
+ }
+
+ auto cppPlugin = GetCPPPluginByName(parsed._deviceName);
+ auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
+ if (pluginAPIInterface == nullptr) {
+ THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the ImportNetwork method";
+ }
+
+ return pluginAPIInterface->ImportNetwork(networkModel, parsed._config);
+ }
+
+ QueryNetworkResult QueryNetwork(const ICNNNetwork& network, const std::string& deviceName,
+ const std::map<std::string, std::string>& config) const override {
+ QueryNetworkResult res;
+ auto parsed = parseDeviceNameIntoConfig(deviceName, config);
+ IE_SUPPRESS_DEPRECATED_START
+ GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config, res);
+ IE_SUPPRESS_DEPRECATED_END
+ return res;
+ }
+
+ Parameter GetMetric(const std::string& deviceName, const std::string& name) const override {
+ // HETERO case
+ {
+ if (deviceName.find("HETERO:") == 0) {
+ THROW_IE_EXCEPTION
+ << "You can get specific metrics with the GetMetric only for the HETERO itself (without devices). "
+ "To get individual devices's metrics call GetMetric for each device separately";
+ }
+ }
+
+ // MULTI case
+ {
+ if (deviceName.find("MULTI:") == 0) {
+ THROW_IE_EXCEPTION
+ << "You can get specific metrics with the GetMetric only for the MULTI itself (without devices). "
+ "To get individual devices's metrics call GetMetric for each device separately";
+ }
+ }
+
+ auto parsed = parseDeviceNameIntoConfig(deviceName);
+ IE_SUPPRESS_DEPRECATED_START
+ InferencePlugin cppPlugin = GetCPPPluginByName(parsed._deviceName);
+ auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
+ IE_SUPPRESS_DEPRECATED_END
+
+ if (pluginAPIInterface == nullptr) {
+ THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the GetMetric method";
+ }
+
+ // we need to return a copy of Parameter object which is created on Core side,
+ // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
+ // TODO: remove this WA after *-31417 is resolved
+ return copyParameterValue(pluginAPIInterface->GetMetric(name, parsed._config));
+ }
+
/**
+ * @deprecated Use ICore::LoadNetwork, ICore::QueryNetwork, ICore::GetMetric instead
* @brief Returns reference to plugin by a device name
- * @param deviceName - a name of device
+ * @param deviceName A name of device
* @return Reference to a plugin
*/
InferenceEnginePluginPtr GetPluginByName(const std::string& deviceName) const override {
}
/**
+ * @deprecated
* @brief Returns reference to CPP plugin wrapper by a device name
- * @param deviceName - a name of device
+ * @param deviceName A name of device
* @return Reference to a CPP plugin wrapper
*/
InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
+ std::lock_guard<std::mutex> lock(pluginsMutex);
+
IE_SUPPRESS_DEPRECATED_START
auto it = pluginRegistry.find(deviceName);
IE_SUPPRESS_DEPRECATED_END
/**
- * @brief Unregisters plugin for specified device
- * @param deviceName - a name of device
+ * @brief Unload plugin for specified device, but plugin meta-data is still in plugin registry
+ * @param deviceName A name of device
*/
- void UnregisterPluginByName(const std::string& deviceName) {
+ void UnloadPluginByName(const std::string& deviceName) {
+ std::lock_guard<std::mutex> lock(pluginsMutex);
auto it = plugins.find(deviceName);
if (it == plugins.end()) {
THROW_IE_EXCEPTION << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine";
}
/**
- * @brief Registers plugin in registry for specified device
- * @param deviceName - a name of device
+ * @brief Registers plugin meta-data in registry for specified device
+ * @param deviceName A name of device
*/
void RegisterPluginByName(const std::string& pluginName, const std::string& deviceName) {
+ std::lock_guard<std::mutex> lock(pluginsMutex);
+
auto it = pluginRegistry.find(deviceName);
if (it != pluginRegistry.end()) {
THROW_IE_EXCEPTION << "Device with \"" << deviceName << "\" is already registered in the InferenceEngine";
pluginRegistry[deviceName] = desc;
}
+ /**
+ * @brief Porvides a list of plugin names in registry; physically such plugins may not be created
+ * @return A list of plugin names
+ */
std::vector<std::string> GetListOfDevicesInRegistry() const {
+ std::lock_guard<std::mutex> lock(pluginsMutex);
+
std::vector<std::string> listOfDevices;
for (auto&& pluginDesc : pluginRegistry) {
listOfDevices.push_back(pluginDesc.first);
return listOfDevices;
}
+ /**
+ * @brief Sets config values for a plugin or set of plugins
+ * @param deviceName A device name to set config to
+ * If empty, config is set for all the plugins / plugin's meta-data
+ */
void SetConfigForPlugins(const std::map<std::string, std::string>& config, const std::string& deviceName) {
+ std::lock_guard<std::mutex> lock(pluginsMutex);
+
// set config for plugins in registry
bool configIsSet = false;
for (auto& desc : pluginRegistry) {
}
}
- void addExtension(const IExtensionPtr& extension) {
+ /**
+ * @brief Registers the extension in a Core object
+ * Such extensions can be used for both CNNNetwork readers and device plugins
+ */
+ void AddExtension(const IExtensionPtr& extension) {
+ std::lock_guard<std::mutex> lock(pluginsMutex);
+
std::map<std::string, ngraph::OpSet> opsets = extension->getOpSets();
for (const auto& it : opsets) {
if (opsetNames.find(it.first) != opsetNames.end())
opsetNames.insert(it.first);
}
+ // add extensions for already created plugins
for (auto& plugin : plugins) {
IE_SUPPRESS_DEPRECATED_START
try {
extensions.emplace_back(extension);
}
- const std::vector<IExtensionPtr>& getExtensions() const {
+ /**
+ * @brief Provides a list of extensions
+ * @return A list of registered extensions
+ */
+ const std::vector<IExtensionPtr>& GetExtensions() const {
return extensions;
}
};
std::string deviceNameLocal = parser.getDeviceName();
IE_SUPPRESS_DEPRECATED_START
- const Version* version = _impl->GetCPPPluginByName(deviceNameLocal).GetVersion();
+ InferenceEngine::InferencePlugin cppPlugin = _impl->GetCPPPluginByName(deviceNameLocal);
+ const Version * version = cppPlugin.GetVersion();
IE_SUPPRESS_DEPRECATED_END
versions[deviceNameLocal] = *version;
}
}
IE_SUPPRESS_DEPRECATED_END
-namespace {
-template <typename T>
-struct Parsed {
- std::string _deviceName;
- std::map<std::string, T> _config;
-};
-
-template <typename T = Parameter>
-Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::map<std::string, T>& config = {}) {
- auto config_ = config;
- auto deviceName_ = deviceName;
- if (deviceName_.find("HETERO:") == 0) {
- deviceName_ = "HETERO";
- config_["TARGET_FALLBACK"] = deviceName.substr(7);
- } else if (deviceName_.find("MULTI:") == 0) {
- deviceName_ = "MULTI";
- config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
- } else {
- DeviceIDParser parser(deviceName_);
- deviceName_ = parser.getDeviceName();
- std::string deviceIDLocal = parser.getDeviceID();
-
- if (!deviceIDLocal.empty()) {
- config_[KEY_DEVICE_ID] = deviceIDLocal;
- }
- }
- return {deviceName_, config_};
-}
-} // namespace
-
CNNNetwork Core::ReadNetwork(const std::string& modelPath, const std::string& binPath) const {
return _impl->ReadNetwork(modelPath, binPath);
}
return _impl->ReadNetwork(model, weights);
}
-ExecutableNetwork Core::LoadNetwork(const CNNNetwork network, const std::string& deviceName,
+ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
const std::map<std::string, std::string>& config) {
- IE_PROFILING_AUTO_SCOPE(Core::LoadNetwork)
- auto parsed = parseDeviceNameIntoConfig(deviceName, config);
- IE_SUPPRESS_DEPRECATED_START
- return _impl->GetCPPPluginByName(parsed._deviceName).LoadNetwork(network, parsed._config);
- IE_SUPPRESS_DEPRECATED_END
+ return _impl->LoadNetwork(network, deviceName, config);
}
void Core::AddExtension(const IExtensionPtr& extension) {
- _impl->addExtension(extension);
+ _impl->AddExtension(extension);
}
-ExecutableNetwork Core::LoadNetwork(const CNNNetwork network, RemoteContext::Ptr context,
+ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, RemoteContext::Ptr context,
const std::map<std::string, std::string>& config) {
IE_PROFILING_AUTO_SCOPE(Core::LoadNetwork)
std::map<std::string, std::string> config_ = config;
std::string deviceName = device.getDeviceName();
IE_SUPPRESS_DEPRECATED_START
- auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(deviceName));
+ auto cppPlugin = _impl->GetCPPPluginByName(deviceName);
+ auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
if (pluginAPIInterface == nullptr) {
THROW_IE_EXCEPTION << deviceName << " does not implement the LoadNetwork method";
std::string deviceName = device.getDeviceName();
IE_SUPPRESS_DEPRECATED_START
- auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(deviceName));
+ auto cppPlugin = _impl->GetCPPPluginByName(deviceName);
+ auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
if (pluginAPIInterface == nullptr) {
THROW_IE_EXCEPTION << deviceName << " does not implement the CreateContext method";
std::string deviceName = device.getDeviceName();
IE_SUPPRESS_DEPRECATED_START
- auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(deviceName));
+ auto cppPlugin = _impl->GetCPPPluginByName(deviceName);
+ auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
if (pluginAPIInterface == nullptr) {
THROW_IE_EXCEPTION << deviceName << " does not implement the CreateContext method";
<< "MULTI device does not support extensions. Please, set extensions directly to fallback devices";
}
- DeviceIDParser parser(deviceName_);
- std::string deviceName = parser.getDeviceName();
-
- IE_SUPPRESS_DEPRECATED_START
- _impl->GetCPPPluginByName(deviceName).AddExtension(extension);
- _impl->addExtension(extension);
- IE_SUPPRESS_DEPRECATED_END
+ _impl->AddExtension(extension);
}
ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const std::string& deviceName,
IE_SUPPRESS_DEPRECATED_END
}
-IE_SUPPRESS_DEPRECATED_START
-
ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const std::string& deviceName,
const std::map<std::string, std::string>& config) {
- auto parsed = parseDeviceNameIntoConfig(deviceName, config);
-
- if (parsed._deviceName.empty()) {
- ExportMagic magic = {};
- networkModel.read(magic.data(), magic.size());
- auto exportedWithName = (exportMagic == magic);
- if (exportedWithName) {
- std::getline(networkModel, parsed._deviceName);
- }
- networkModel.seekg(0, networkModel.beg);
- }
-
- auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(parsed._deviceName));
- if (pluginAPIInterface == nullptr) {
- THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the ImportNetwork method";
- }
-
- return pluginAPIInterface->ImportNetwork(networkModel, parsed._config);
+ return _impl->ImportNetwork(networkModel, deviceName, config);
}
-IE_SUPPRESS_DEPRECATED_END
-
ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
const RemoteContext::Ptr& context,
const std::map<std::string, std::string>& config) {
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
IE_SUPPRESS_DEPRECATED_START
- auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(parsed._deviceName));
+ auto cppPlugin = _impl->GetCPPPluginByName(deviceName);
+ auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
if (pluginAPIInterface == nullptr) {
THROW_IE_EXCEPTION << deviceName << " does not implement the ImportNetwork method";
QueryNetworkResult Core::QueryNetwork(const ICNNNetwork& network, const std::string& deviceName,
const std::map<std::string, std::string>& config) const {
- QueryNetworkResult res;
- auto parsed = parseDeviceNameIntoConfig(deviceName, config);
- IE_SUPPRESS_DEPRECATED_START
- _impl->GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config, res);
- IE_SUPPRESS_DEPRECATED_END
- return res;
+ return _impl->QueryNetwork(network, deviceName, config);
}
void Core::SetConfig(const std::map<std::string, std::string>& config, const std::string& deviceName) {
auto parsed = parseDeviceNameIntoConfig(deviceName);
IE_SUPPRESS_DEPRECATED_START
- auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(parsed._deviceName));
+ auto cppPlugin = _impl->GetCPPPluginByName(parsed._deviceName);
+ auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
IE_SUPPRESS_DEPRECATED_END
+
if (pluginAPIInterface == nullptr) {
THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the GetConfig method";
}
- return pluginAPIInterface->GetConfig(name, parsed._config);
+
+ // we need to return a copy of Parameter object which is created on Core side,
+ // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
+ // TODO: remove this WA after *-31417 is resolved
+ return copyParameterValue(pluginAPIInterface->GetConfig(name, parsed._config));
}
Parameter Core::GetMetric(const std::string& deviceName, const std::string& name) const {
- // HETERO case
- {
- if (deviceName.find("HETERO:") == 0) {
- THROW_IE_EXCEPTION
- << "You can get specific metrics with the GetMetric only for the HETERO itself (without devices). "
- "To get individual devices's metrics call GetMetric for each device separately";
- }
- }
-
- // MULTI case
- {
- if (deviceName.find("MULTI:") == 0) {
- THROW_IE_EXCEPTION
- << "You can get specific metrics with the GetMetric only for the MULTI itself (without devices). "
- "To get individual devices's metrics call GetMetric for each device separately";
- }
- }
-
- auto parsed = parseDeviceNameIntoConfig(deviceName);
- IE_SUPPRESS_DEPRECATED_START
- auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(parsed._deviceName));
- IE_SUPPRESS_DEPRECATED_END
- if (pluginAPIInterface == nullptr) {
- THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the GetMetric method";
- }
-
- return pluginAPIInterface->GetMetric(name, parsed._config);
+ return _impl->GetMetric(deviceName, name);
}
std::vector<std::string> Core::GetAvailableDevices() const {
std::string propertyName = METRIC_KEY(AVAILABLE_DEVICES);
for (auto&& deviceName : _impl->GetListOfDevicesInRegistry()) {
- Parameter p;
std::vector<std::string> devicesIDs;
+ IE_SUPPRESS_DEPRECATED_START
try {
- p = GetMetric(deviceName, propertyName);
+ Parameter p = GetMetric(deviceName, propertyName);
devicesIDs = p.as<std::vector<std::string>>();
} catch (details::InferenceEngineException&) {
// plugin is not created by e.g. invalid env
THROW_IE_EXCEPTION << "Unknown exception is thrown while trying to create the " << deviceName
<< " device and call GetMetric";
}
+ IE_SUPPRESS_DEPRECATED_END
if (devicesIDs.size() > 1) {
for (auto&& deviceID : devicesIDs) {
DeviceIDParser parser(deviceName_);
std::string deviceName = parser.getDeviceName();
- _impl->UnregisterPluginByName(deviceName);
+ _impl->UnloadPluginByName(deviceName);
}
} // namespace InferenceEngine
#include "threading/ie_cpu_streams_executor.hpp"
namespace InferenceEngine {
+struct CPUStreamsExecutor::Impl {
+ struct Stream {
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
-struct PinningObserver: public tbb::task_scheduler_observer {
- CpuSet& _mask;
- int _ncpus = 0;
- int _streamId = 0;
- int _threadsPerStream = 0;
- int _threadBindingStep = 0;
- int _threadBindingOffset = 0;
-
- PinningObserver(tbb::task_arena& arena,
- CpuSet& mask,
- int ncpus,
- const int streamId,
- const int threadsPerStream,
- const int threadBindingStep,
- const int threadBindingOffset) :
- tbb::task_scheduler_observer(arena),
- _mask(mask),
- _ncpus(ncpus),
- _streamId(streamId),
- _threadsPerStream(threadsPerStream),
- _threadBindingStep(threadBindingStep),
- _threadBindingOffset(threadBindingOffset) {
- observe(true);
- }
-
- void on_scheduler_entry(bool) override {
- int threadIdx = tbb::task_arena::current_thread_index();
- int thrIdx = _streamId * _threadsPerStream + threadIdx + _threadBindingOffset;
- // pin thread to the vacant slot
- PinThreadToVacantCore(thrIdx, _threadBindingStep, _ncpus, _mask);
- }
-
- void on_scheduler_exit(bool) override {
- // reset the thread's mask (to the original process mask)
- PinCurrentThreadByMask(_ncpus, _mask);
- }
-
- ~PinningObserver() {
- observe(false);
- }
-};
-#endif // IE_THREAD != IE_THREAD_TBB
-
-struct Stream {
- int _streamId = 0;
- int _numaNodeId = 0;
-#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
- std::unique_ptr<tbb::task_arena> _taskArena;
- std::unique_ptr<PinningObserver> _pinningObserver;
+ struct Observer: public tbb::task_scheduler_observer {
+ CpuSet _mask;
+ int _ncpus = 0;
+ int _threadBindingStep = 0;
+ int _offset = 0;
+ Observer(tbb::task_arena& arena,
+ CpuSet mask,
+ int ncpus,
+ const int streamId,
+ const int threadsPerStream,
+ const int threadBindingStep,
+ const int threadBindingOffset) :
+ tbb::task_scheduler_observer(arena),
+ _mask{std::move(mask)},
+ _ncpus(ncpus),
+ _threadBindingStep(threadBindingStep),
+ _offset{streamId * threadsPerStream + threadBindingOffset} {
+ }
+ void on_scheduler_entry(bool) override {
+ PinThreadToVacantCore(_offset + tbb::task_arena::current_thread_index(), _threadBindingStep, _ncpus, _mask);
+ }
+ void on_scheduler_exit(bool) override {
+ PinCurrentThreadByMask(_ncpus, _mask);
+ }
+ ~Observer() override = default;
+ };
#endif
-};
-
-struct CPUStreamsExecutor::Impl {
- std::string _name;
- std::vector<std::thread> _threads;
- std::mutex _mutex;
- std::condition_variable _queueCondVar;
- std::queue<Task> _taskQueue;
- bool _isStopped = false;
- int _ncpus = 0;
- CpuSet _processMask;
- ThreadLocal<Stream*> _localStream;
-};
-
-int CPUStreamsExecutor::GetStreamId() {
- auto stream = _impl->_localStream.local();
- if (nullptr == stream) THROW_IE_EXCEPTION << "Not in the stream thread";
- return stream->_streamId;
-}
-
-int CPUStreamsExecutor::GetNumaNodeId() {
- auto stream = _impl->_localStream.local();
- if (nullptr == stream) THROW_IE_EXCEPTION << "Not in the stream thread";
- return stream->_numaNodeId;
-}
-
-CPUStreamsExecutor::CPUStreamsExecutor(const IStreamsExecutor::Config& config) :
- _impl{new Impl} {
- IE_ASSERT(config._streams > 0);
- _impl->_name = config._name;
- auto numaNodes = getAvailableNUMANodes();
- IE_ASSERT(!numaNodes.empty());
- if (ThreadBindingType::CORES == config._threadBindingType) {
- std::tie(_impl->_processMask, _impl->_ncpus) = GetProcessMask();
- }
- for (auto streamId = 0; streamId < config._streams; ++streamId) {
- _impl->_threads.emplace_back([=] {
- annotateSetThreadName((_impl->_name + "_" + std::to_string(streamId)).c_str());
- Stream stream;
- stream._streamId = streamId;
- stream._numaNodeId = numaNodes.at(streamId/((config._streams + numaNodes.size() - 1)/numaNodes.size()));
+ explicit Stream(Impl* impl) :
+ _impl(impl) {
+ {
+ std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
+ if (_impl->_streamIdQueue.empty()) {
+ _streamId = _impl->_streamId++;
+ } else {
+ _streamId = _impl->_streamIdQueue.front();
+ _impl->_streamIdQueue.pop();
+ }
+ }
+ _numaNodeId = _impl->_usedNumaNodes.at(
+ (_streamId % _impl->_config._streams)/
+ ((_impl->_config._streams + _impl->_usedNumaNodes.size() - 1)/_impl->_usedNumaNodes.size()));
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
- auto concurrency = (0 == config._threadsPerStream) ? tbb::task_arena::automatic : config._threadsPerStream;
- if (ThreadBindingType::NUMA == config._threadBindingType) {
- stream._taskArena.reset(new tbb::task_arena(tbb::task_arena::constraints(stream._numaNodeId, concurrency)));
- } else if ((0 != config._threadsPerStream) || ThreadBindingType::CORES == config._threadBindingType) {
- stream._taskArena.reset(new tbb::task_arena(concurrency));
- if (ThreadBindingType::CORES == config._threadBindingType) {
- if (nullptr != _impl->_processMask) {
- stream._pinningObserver.reset(new PinningObserver{*stream._taskArena,
- _impl->_processMask,
- _impl->_ncpus,
- stream._streamId,
- config._threadsPerStream,
- config._threadBindingStep,
- config._threadBindingOffset});
+ auto concurrency = (0 == _impl->_config._threadsPerStream) ? tbb::task_arena::automatic : _impl->_config._threadsPerStream;
+ if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
+ _taskArena.reset(new tbb::task_arena{tbb::task_arena::constraints{_numaNodeId, concurrency}});
+ } else if ((0 != _impl->_config._threadsPerStream) || (ThreadBindingType::CORES == _impl->_config._threadBindingType)) {
+ _taskArena.reset(new tbb::task_arena{concurrency});
+ if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
+ CpuSet processMask;
+ int ncpus = 0;
+ std::tie(processMask, ncpus) = GetProcessMask();
+ if (nullptr != processMask) {
+ _observer.reset(new Observer{*_taskArena,
+ std::move(processMask),
+ ncpus,
+ _streamId,
+ _impl->_config._threadsPerStream,
+ _impl->_config._threadBindingStep,
+ _impl->_config._threadBindingOffset});
+ _observer->observe(true);
}
}
}
#elif IE_THREAD == IE_THREAD_OMP
- omp_set_num_threads(config._threadsPerStream);
- if (!checkOpenMpEnvVars(false) && (ThreadBindingType::NONE != config._threadBindingType)) {
- if (nullptr != _impl->_processMask) {
- parallel_nt(config._threadsPerStream, [&] (int threadIndex, int threadsPerStream) {
- int thrIdx = stream._streamId * threadsPerStream + threadIndex + config._threadBindingOffset;
- PinThreadToVacantCore(thrIdx, config._threadBindingStep, _impl->_ncpus, _impl->_processMask);
+ omp_set_num_threads(_impl->_config._threadsPerStream);
+ if (!checkOpenMpEnvVars(false) && (ThreadBindingType::NONE != _impl->_config._threadBindingType)) {
+ CpuSet processMask;
+ int ncpus = 0;
+ std::tie(processMask, ncpus) = GetProcessMask();
+ if (nullptr != processMask) {
+ parallel_nt(_impl->_config._threadsPerStream, [&] (int threadIndex, int threadsPerStream) {
+ int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex + _impl->_config._threadBindingOffset;
+ PinThreadToVacantCore(thrIdx, _impl->_config._threadBindingStep, ncpus, processMask);
});
}
}
#elif IE_THREAD == IE_THREAD_SEQ
- if (ThreadBindingType::NUMA == config._threadBindingType) {
- PinCurrentThreadToSocket(stream._numaNodeId);
- } else if (ThreadBindingType::CORES == config._threadBindingType) {
- PinThreadToVacantCore(stream._streamId + config._threadBindingOffset, config._threadBindingStep, _impl->_ncpus, _impl->_processMask);
+ if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
+ PinCurrentThreadToSocket(_numaNodeId);
+ } else if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
+ CpuSet processMask;
+ int ncpus = 0;
+ std::tie(processMask, ncpus) = GetProcessMask();
+ if (nullptr != processMask) {
+ PinThreadToVacantCore(_streamId + _impl->_config._threadBindingOffset, _impl->_config._threadBindingStep, ncpus, processMask);
+ }
}
#endif
- _impl->_localStream.local() = &stream;
- for (bool stopped = false; !stopped;) {
- Task currentTask;
- { // waiting for the new task or for stop signal
- std::unique_lock<std::mutex> lock(_impl->_mutex);
- _impl->_queueCondVar.wait(lock, [&] { return !_impl->_taskQueue.empty() || (stopped = _impl->_isStopped); });
- if (!_impl->_taskQueue.empty()) {
- currentTask = std::move(_impl->_taskQueue.front());
- _impl->_taskQueue.pop();
+ }
+ ~Stream() {
+ {
+ std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
+ _impl->_streamIdQueue.push(_streamId);
+ }
+#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
+ if (nullptr != _observer) {
+ _observer->observe(false);
+ }
+#endif
+ }
+
+ Impl* _impl = nullptr;
+ int _streamId = 0;
+ int _numaNodeId = 0;
+ bool _execute = false;
+ std::queue<Task> _taskQueue;
+#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
+ std::unique_ptr<tbb::task_arena> _taskArena;
+ std::unique_ptr<Observer> _observer;
+#endif
+ };
+
+ explicit Impl(const Config& config) :
+ _config{config},
+ _streams([this] {
+ return std::make_shared<Impl::Stream>(this);
+ }) {
+ auto numaNodes = getAvailableNUMANodes();
+ std::copy_n(std::begin(numaNodes),
+ std::min(std::max(static_cast<std::size_t>(1),
+ static_cast<std::size_t>(_config._streams)),
+ numaNodes.size()),
+ std::back_inserter(_usedNumaNodes));
+ for (auto streamId = 0; streamId < _config._streams; ++streamId) {
+ _threads.emplace_back([this, streamId] {
+ annotateSetThreadName((_config._name + "_" + std::to_string(streamId)).c_str());
+ for (bool stopped = false; !stopped;) {
+ Task task;
+ {
+ std::unique_lock<std::mutex> lock(_mutex);
+ _queueCondVar.wait(lock, [&] { return !_taskQueue.empty() || (stopped = _isStopped); });
+ if (!_taskQueue.empty()) {
+ task = std::move(_taskQueue.front());
+ _taskQueue.pop();
+ }
+ }
+ if (task) {
+ Execute(task, *(_streams.local()));
}
}
+ });
+ }
+ }
- if (currentTask) {
+ void Enqueue(Task task) {
+ {
+ std::lock_guard<std::mutex> lock(_mutex);
+ _taskQueue.emplace(std::move(task));
+ }
+ _queueCondVar.notify_one();
+ }
+
+ void Execute(const Task& task, Stream& stream) {
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
- if (nullptr != stream._taskArena) {
- stream._taskArena->execute(std::move(currentTask));
- } else {
- currentTask();
- }
+ auto& arena = stream._taskArena;
+ if (nullptr != arena) {
+ arena->execute(std::move(task));
+ } else {
+ task();
+ }
#else
- currentTask();
+ task();
#endif
+ }
+
+ void Defer(Task task) {
+ auto& stream = *(_streams.local());
+ stream._taskQueue.push(std::move(task));
+ if (!stream._execute) {
+ stream._execute = true;
+ try {
+ while (!stream._taskQueue.empty()) {
+ Execute(stream._taskQueue.front(), stream);
+ stream._taskQueue.pop();
}
- }
- });
+ } catch(...) {}
+ stream._execute = false;
+ }
}
+
+ Config _config;
+ std::mutex _streamIdMutex;
+ int _streamId = 0;
+ std::queue<int> _streamIdQueue;
+ std::vector<std::thread> _threads;
+ std::mutex _mutex;
+ std::condition_variable _queueCondVar;
+ std::queue<Task> _taskQueue;
+ bool _isStopped = false;
+ std::vector<int> _usedNumaNodes;
+ ThreadLocal<std::shared_ptr<Stream>> _streams;
+};
+
+
+int CPUStreamsExecutor::GetStreamId() {
+ auto stream = _impl->_streams.local();
+ return stream->_streamId;
+}
+
+int CPUStreamsExecutor::GetNumaNodeId() {
+ auto stream = _impl->_streams.local();
+ return stream->_numaNodeId;
+}
+
+CPUStreamsExecutor::CPUStreamsExecutor(const IStreamsExecutor::Config& config) :
+ _impl{new Impl{config}} {
}
CPUStreamsExecutor::~CPUStreamsExecutor() {
}
}
+void CPUStreamsExecutor::Execute(Task task) {
+ _impl->Defer(std::move(task));
+}
+
void CPUStreamsExecutor::run(Task task) {
- {
- std::lock_guard<std::mutex> lock(_impl->_mutex);
- _impl->_taskQueue.emplace(std::move(task));
+ if (0 == _impl->_config._streams) {
+ _impl->Defer(std::move(task));
+ } else {
+ _impl->Enqueue(std::move(task));
}
- _impl->_queueCondVar.notify_one();
}
} // namespace InferenceEngine
return {};
}
-} // namespace InferenceEngine
+} // namespace InferenceEngine
\ No newline at end of file
std::string XMLParseUtils::GetStrAttr(const pugi::xml_node& node, const char* str) {
auto attr = node.attribute(str);
if (attr.empty())
- THROW_IE_EXCEPTION << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
+ THROW_IE_EXCEPTION << "node <" << node.name() << "> is missing mandatory attribute: '" << str << "' at offset "
<< node.offset_debug();
return attr.value();
}
}
std::shared_ptr<ngraph::Node> ngraphNode;
- if (opsets.count(params.version)) {
- auto opset = opsets.at(params.version);
-
- for (const auto& creator : creators) {
- if (creator->shouldCreate(params.type)) {
- ngraphNode = creator->createLayer(inputs, node, weights, params);
- break;
+ // Try to create operation from creators
+ for (const auto& creator : creators) {
+ if (creator->shouldCreate(params.type)) {
+ bool useCreator = false;
+ // Check that opset is registered
+ useCreator |= opsets.find(params.version) == opsets.end();
+ if (!useCreator) {
+ // Check that creator can create operation with the version from opset
+ const auto opset = opsets.at(params.version);
+ // Opset should contains the same version of operation or doesn't contain operation with current type
+ useCreator |= opset.contains_type(creator->getNodeType()) || !opset.contains_type(params.type);
}
+ if (useCreator)
+ ngraphNode = creator->createLayer(inputs, node, weights, params);
+ break;
}
+ }
- if (!ngraphNode) {
- if (!opset.contains_type(params.type)) {
- THROW_IE_EXCEPTION << "Opset " << params.version << " doesn't contain the operation with type: " << params.type;
- }
+ // Try to create operation from loaded opsets
+ if (!ngraphNode && opsets.count(params.version)) {
+ auto opset = opsets.at(params.version);
- ngraphNode = std::shared_ptr<ngraph::Node>(opset.create(params.type));
- ngraphNode->set_arguments(inputs);
- XmlDeserializer visitor(node);
- if (ngraphNode->visit_attributes(visitor))
- ngraphNode->constructor_validate_and_infer_types();
+ if (!opset.contains_type(params.type)) {
+ THROW_IE_EXCEPTION << "Opset " << params.version << " doesn't contain the operation with type: " << params.type;
}
+
+ ngraphNode = std::shared_ptr<ngraph::Node>(opset.create(params.type));
+ ngraphNode->set_arguments(inputs);
+ XmlDeserializer visitor(node);
+ if (ngraphNode->visit_attributes(visitor))
+ ngraphNode->constructor_validate_and_infer_types();
}
+ // Create GenericIE operation for backward compatibility
if (!ngraphNode && (params.version == "experimental" || params.version == "extension")) {
// Try to create Generic node for backward compatibility
std::map<std::string, Parameter> parameters;
const GenericLayerParams& layerParsePrms) = 0;
bool shouldCreate(const std::string& nodeType) const;
-
- std::shared_ptr<ngraph::Node> createOptionalParameter(const GenericLayerParams::LayerPortData& port);
+ virtual ngraph::NodeTypeInfo getNodeType() const = 0;
};
template <class T>
std::shared_ptr<ngraph::Node> createLayer(const ngraph::OutputVector& inputs, const pugi::xml_node& node,
const Blob::CPtr& weights,
const GenericLayerParams& layerParsePrms) override;
+ ngraph::NodeTypeInfo getNodeType() const override {
+ return T::type_info;
+ }
};
std::shared_ptr<ngraph::Node> createNode(const ngraph::OutputVector& inputs, const pugi::xml_node& node,
std::vector<size_t> shape;
if (!getParameters<size_t>(node.child("data"), name, shape)) return;
static_cast<ngraph::Strides&>(*a) = ngraph::Strides(shape);
+ } else if (auto a = ngraph::as_type<ngraph::AttributeAdapter<ngraph::op::TopKSortType>>(&adapter)) {
+ if (!getStrAttribute(node.child("data"), name, val)) return;
+ static_cast<ngraph::op::TopKSortType&>(*a) = ngraph::as_enum<ngraph::op::TopKSortType>(val);
+ } else if (auto a = ngraph::as_type<ngraph::AttributeAdapter<ngraph::op::TopKMode>>(&adapter)) {
+ if (!getStrAttribute(node.child("data"), name, val)) return;
+ static_cast<ngraph::op::TopKMode&>(*a) = ngraph::as_enum<ngraph::op::TopKMode>(val);
}
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<double>& adapter) override {
res->params = params;
return res;
});
+
+ addSpecificCreator({"ScatterElementsUpdate"}, [](const std::shared_ptr<::ngraph::Node>& node,
+ const std::map<std::string, std::string> params) -> CNNLayerPtr {
+ LayerParams attrs = {node->get_friendly_name(), node->description(),
+ details::convertPrecision(node->get_output_element_type(0))};
+ auto res = std::make_shared<ScatterElementsUpdateLayer>(attrs);
+ res->params = params;
+ return res;
+ });
}
CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() {
}
size_t inputCount(0);
for (size_t i = 0; i < layer->get_input_size(); i++) {
- const auto &input = layer->get_inputs()[i];
- if (isInternalLayer(input.get_output().get_node(), op_names, keep_constants)) continue;
+ const auto &constant = ngraph::as_type_ptr<ngraph::op::Constant>(layer->get_inputs()[i].get_output().get_node());
+ if (constant && isInternalConstLayer(constant, layer, keep_constants)) {
+ continue;
+ }
inputCount++;
}
cnnLayer->insData.resize(inputCount);
namespace InferenceEngine {
Precision CNNNetwork::getPrecision() const {
+ if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
return actual->getPrecision();
}
};
void CNNNetwork::AddExtension(InferenceEngine::IShapeInferExtensionPtr extension) {
- CALL_STATUS_FNC(AddExtension, extension);
+ CALL_STATUS_FNC(AddExtension, extension);
}
CNNLayer::CNNLayer(const LayerParams& prms)
THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'updates' tensors dimension";
Precision inIdxPrecision = layer->insData[INDICES].lock()->getTensorDesc().getPrecision();
- if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32)
- THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Indices' precision. Only FP32 or I32 are supported!";
+ if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::I64)
+ THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Indices' precision. Only FP32 or I32 or I64 are supported!";
Precision inAxisPrecision = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
- if (inAxisPrecision != Precision::FP32 && inAxisPrecision != Precision::I32)
- THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Axis' precision. Only FP32 or I32 are supported!";
+ if (inAxisPrecision != Precision::FP32 && inAxisPrecision != Precision::I32 && inIdxPrecision != Precision::I64)
+ THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Axis' precision. Only FP32 or I32 or I64 are supported!";
if (layer->insData[DATA].lock()->getTensorDesc().getPrecision() !=
layer->insData[UPDATES].lock()->getTensorDesc().getPrecision())
if (!_body_reshaper)
THROW_IE_EXCEPTION << "Request of apply reshape results while shape infer was not finished";
_body_reshaper->apply();
+ _body_reshaper.reset(); // WA: reset _body_reshaper to release ownership for input data
}
private:
/**
* @brief Perform shape inference for the given input shapes but not apply it.
- * In case of cusses call apply() method.
+ * In case of success call apply() method.
* @param inputShapes - Map of input names (data) to their input shapes.
* @throws exception if shape infer failed without corruption of original shapes
*/
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include "ie_layers.h"
+#include "low_precision_transformations/transformation_context.hpp"
+#include "low_precision_transformations/layer_transformation.hpp"
+
+namespace InferenceEngine {
+namespace details {
+
+class INFERENCE_ENGINE_API_CLASS(PowerTransformation) : public LayerTransformation {
+public:
+ PowerTransformation(const Params& params) : LayerTransformation(params) {}
+ ~PowerTransformation() override {}
+ void transform(TransformationContext& context, CNNLayer& layer) const override;
+ bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
+};
+
+} // namespace details
+} // namespace InferenceEngine
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformations/power.hpp"
+
+#include <algorithm>
+#include <details/caseless.hpp>
+#include <string>
+#include <memory>
+#include <vector>
+
+#include "low_precision_transformations/common/ie_lpt_exception.hpp"
+#include "low_precision_transformations/network_helper.hpp"
+
+using namespace InferenceEngine;
+using namespace InferenceEngine::details;
+
+bool PowerTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const {
+ if (!LayerTransformation::canBeTransformed(context, layer)) {
+ return false;
+ }
+
+ if (layer.insData.size() != 1) {
+ THROW_IE_LPT_EXCEPTION(layer) << "layer inputs '" << layer.insData.size() << "' is not correct";
+ }
+
+ if (!CaselessEq<std::string>()(layer.type, "Power")) {
+ THROW_IE_LPT_EXCEPTION(layer) << "layer '" << layer.name << "' is not correct";
+ }
+
+ const PowerLayer* powerLayer = dynamic_cast<const PowerLayer*>(&layer);
+ if (powerLayer == nullptr) {
+ THROW_IE_LPT_EXCEPTION(layer) << "unexpected Power layer type";
+ }
+ if (powerLayer->power != 1.f) {
+ return false;
+ }
+
+ const CNNLayerPtr parent = CNNNetworkHelper::getParent(layer, 0);
+ return !(parent->type != "ScaleShift");
+}
+
+void PowerTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
+ if (!canBeTransformed(context, layer)) {
+ return;
+ }
+
+ const PowerLayer* powerLayer = dynamic_cast<const PowerLayer*>(&layer);
+ if (powerLayer == nullptr) {
+ THROW_IE_LPT_EXCEPTION(layer) << "unexpected Power layer type";
+ }
+
+ const CNNLayerPtr parent = CNNNetworkHelper::getParent(layer, 0);
+
+ Blob::Ptr weightsBlob = CNNNetworkHelper::getBlob(parent, "weights");
+ auto wBuffer = weightsBlob->buffer().as<float*>();
+ for (size_t channel = 0ul; channel < weightsBlob->size(); ++channel) {
+ wBuffer[channel] = wBuffer[channel] * powerLayer->scale;
+ }
+
+ Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(parent, "biases");
+ auto sBuffer = shiftsBlob->buffer().as<float*>();
+ for (size_t channel = 0ul; channel < shiftsBlob->size(); ++channel) {
+ sBuffer[channel] = sBuffer[channel] * powerLayer->scale + powerLayer->offset;
+ }
+
+ const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
+ CNNNetworkHelper::removeLayer(context.network, std::make_shared<CNNLayer>(layer));
+ context.removeLayer(layer);
+ if (children.empty()) {
+ const std::string originalName = layer.name;
+ CNNNetworkHelper::renameLayer(context.network, parent->name, layer.name);
+ }
+}
using namespace InferenceEngine;
using namespace InferenceEngine::details;
-static const std::unordered_set<std::string> defaultIgnoreWithParents = {
+static const char * defaultIgnoreWithParents[] = {
"Convolution",
"FakeQuantize"
};
ScaleShiftToConvolutionTransformation::ScaleShiftToConvolutionTransformation(const Params& params) :
WeightableLayerTransformation(params),
groupSize(1ul),
- ignoreWithParents(defaultIgnoreWithParents) {
+ ignoreWithParents(defaultIgnoreWithParents, defaultIgnoreWithParents +
+ sizeof(defaultIgnoreWithParents) / sizeof(defaultIgnoreWithParents[0])) {
}
void ScaleShiftToConvolutionTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
#include "low_precision_transformations/permute.hpp"
#include "low_precision_transformations/pooling.hpp"
#include "low_precision_transformations/resample.hpp"
+#include "low_precision_transformations/power.hpp"
#include "low_precision_transformations/reshape.hpp"
#include "low_precision_transformations/scaleshift_to_convolution.hpp"
#include "low_precision_transformations/squeeze.hpp"
{ "ReLU", LayerTransformationPtr(new ActivationTransformation(params)) },
{ "MVN", LayerTransformationPtr(new MvnTransformation(params)) },
{ "Eltwise", LayerTransformationPtr(new EltwiseTransformation(params)) },
- { "Resample", LayerTransformationPtr(new ResampleTransformation(params)) }
+ { "Resample", LayerTransformationPtr(new ResampleTransformation(params)) },
+ { "Power", LayerTransformationPtr(new PowerTransformation(params)) }
}),
std::map<std::string, LayerTransformationPtr>({
{ "FakeQuantize", LayerTransformationPtr(new FuseFakeQuantizeAndScaleShiftTransformation(params)) },
addVersionDefines(mkldnn_plugin.cpp CI_BUILD_NUMBER MKL_VERSION)
include_directories(
- ${IE_MAIN_SOURCE_DIR}/include
$<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
- ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/mkldnn
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_BINARY_DIR}/include)
+
+include_directories(SYSTEM
${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/common
${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/cpu
- ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/include
- ${CMAKE_BINARY_DIR}/include/
-)
+ ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/include)
if (GEMM STREQUAL "MKL")
log_rpath_from_dir(MKL "${MKL}/lib")
set_ie_threading_interface_for(mkldnn_plugin_layers_no_opt_s)
target_compile_definitions(mkldnn_plugin_layers_no_opt_s PRIVATE "USE_STATIC_IE;IMPLEMENT_INFERENCE_ENGINE_PLUGIN")
-set(object_libraries mkldnn_plugin_layers_no_opt)
-set(mkldnn_plugin_object_libraries mkldnn_plugin_layers_no_opt_s)
+list(APPEND object_libraries mkldnn_plugin_layers_no_opt)
+list(APPEND mkldnn_plugin_object_libraries mkldnn_plugin_layers_no_opt_s)
# SSE 4.2 optimized layers
InputsDataMap inputs = network.getInputsInfo();
OutputsDataMap outputs = network.getOutputsInfo();
for (auto iter : sortedLayers) {
+ if (_skipmarking.find(iter->type) != _skipmarking.end()) {
+ continue;
+ }
for (size_t o = 0; o < iter->outData.size(); o++) {
if (inputs.find(iter->outData[o]->getName()) == inputs.end()
&& outputs.find(iter->outData[o]->getName()) == outputs.end()
// 2b. go over all unknown layers for this algo and mark them as fp32 and add to the toAnalyzeTensors
// 2c. go over all inputs to _initbf16 and if they are fp32 - add them to the toAnalyzeTensors
for (auto iter : sortedLayers) {
+ if (_skipmarking.find(iter->type) != _skipmarking.end()) {
+ continue;
+ }
if (_initbf16.find(iter->type) == _initbf16.end()
&& _complementbf16.find(iter->type) == _complementbf16.end()
&& _multiinput.find(iter->type) == _multiinput.end()) {
const InferenceEngine::details::caseless_set<std::string> _initbf16 =
{ "convolution", "fullyconnected", "innerproduct" };
const InferenceEngine::details::caseless_set<std::string> _complementbf16 =
- { "relu", "pooling", "norm", "gather" };
+ { "relu", "tanh", "elu", "square", "abs", "sqrt", "linear", "bounded_relu", "soft_relu", "logistic",
+ "exp", "gelu", "clamp", "swish", "prelu", "pooling", "norm", "gather" };
const InferenceEngine::details::caseless_set<std::string> _multiinput =
{ "concat", "eltwise" };
+ const InferenceEngine::details::caseless_set<std::string> _skipmarking =
+ { "const" };
/**
* Tries to mark tensor as FP32 by analyzing of local consumers of the tensor. Do not mark if
MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network,
const Config &cfg,
const MKLDNNExtensionManager::Ptr& extMgr) :
- InferenceEngine::ExecutableNetworkThreadSafeDefault([&] ()->ITaskExecutor::Ptr {
- ExecutorManager *executorManager = ExecutorManager::getInstance();
-
- if (cfg.exclusiveAsyncRequests) {
- // special case when all InferRequests are muxed into a single queue
- return executorManager->getExecutor("CPU");;
- } else {
- const int env_threads = parallel_get_env_threads();
- const auto& numa_nodes = getAvailableNUMANodes();
- const auto numa_nodes_num = numa_nodes.size();
- auto streamExecutorConfig = cfg.streamExecutorConfig;
- // use logical cores only for single-socket targets in throughput mode
- const int hw_cores = streamExecutorConfig._streams > 1 && numa_nodes_num == 1 ? parallel_get_max_threads() : getNumberOfCPUCores();
- const int threads = streamExecutorConfig._threads ? streamExecutorConfig._threads : (env_threads ? env_threads : hw_cores);
- streamExecutorConfig._threadsPerStream = std::max(1, threads/streamExecutorConfig._streams);
- streamExecutorConfig._name = "CPUStreamsExecutor";
- return executorManager->getIdleCPUStreamsExecutor(streamExecutorConfig);
- }
- } ()),
+ InferenceEngine::ExecutableNetworkThreadSafeDefault{nullptr, nullptr},
extensionManager(extMgr),
_cfg{cfg},
_name{network.getName()} {
LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
"ScaleShift"));
transformer.transform(*_clonedNetwork);
- if (with_cpu_x86_bfloat16()) {
+
+ // Check if network is INT8 or Binary.
+ // BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution:
+ // BF16 + INT8 or BF16 + BIN.
+ bool isFloatModel = true;
+ CNNNetworkIterator i(&network);
+ while (i != CNNNetworkIterator()) {
+ if (CaselessEq<std::string>()((*i)->type, "FakeQuantize")) {
+ isFloatModel = false;
+ break;
+ }
+ i++;
+ }
+
+ if (with_cpu_x86_bfloat16() && isFloatModel) {
BF16Transformer bf16Transformer;
CNNNetwork cnnetwork(_clonedNetwork);
if (cfg.enforceBF16 == true) {
}
}
+ if (cfg.exclusiveAsyncRequests) {
+ // special case when all InferRequests are muxed into a single queue
+ _taskExecutor = ExecutorManager::getInstance()->getExecutor("CPU");
+ } else {
+ const int env_threads = parallel_get_env_threads();
+ const auto& numa_nodes = getAvailableNUMANodes();
+ const auto numa_nodes_num = numa_nodes.size();
+ auto streamExecutorConfig = cfg.streamExecutorConfig;
+ // use logical cores only for single-socket targets in throughput mode
+ const int hw_cores = streamExecutorConfig._streams > 1 && numa_nodes_num == 1 ? parallel_get_max_threads() : getNumberOfCPUCores();
+ const int threads = streamExecutorConfig._threads ? streamExecutorConfig._threads : (env_threads ? env_threads : hw_cores);
+ streamExecutorConfig._threadsPerStream = streamExecutorConfig._streams
+ ? std::max(1, threads/streamExecutorConfig._streams)
+ : threads;
+ streamExecutorConfig._name = "CPUStreamsExecutor";
+ _taskExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamExecutorConfig);
+ }
+ if (0 != cfg.streamExecutorConfig._streams) {
+ _callbackExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
+ IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0, IStreamsExecutor::ThreadBindingType::NONE});
+ } else {
+ _callbackExecutor = _taskExecutor;
+ }
+
_graphs = decltype(_graphs){[&] {
// TODO: Remove `cloneNet` to `localNetwork` when `MKLDNNGraph::CreateGraph`
// is fixed and does not change content of network passed (CVS-26420)
Config engConfig = _graphs.begin()->get()->getProperty();
auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
IE_ASSERT(option != engConfig._config.end());
- result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, static_cast<unsigned int>(std::stoi(option->second)));
+ auto streams = std::stoi(option->second);
+ result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, static_cast<unsigned int>(
+ streams ? streams : 1));
} else {
THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name;
}
return activationNode &&
(activationNode->getAlgorithm() == eltwise_relu ||
(conv->getCnnLayer()->precision == Precision::FP32 &&
- conv->getCnnLayer()->insData[0].lock()->getPrecision() != Precision::BF16 &&
isOneOf(activationNode->getAlgorithm(), {eltwise_elu, eltwise_logistic, eltwise_bounded_relu, eltwise_clamp, eltwise_swish})));
};
auto isSutableParentNode = [](MKLDNNNodePtr node) {
return node->getType() == FullyConnected &&
- node->getCnnLayer()->insData[0].lock()->getPrecision() != Precision::BF16 &&
node->getChildEdges().size() == 1;
};
bool isSutableConv = (node->getType() == Convolution) &&
node->getCnnLayer()->precision == Precision::FP32;
bool isSutableBinConv = node->getType() == BinaryConvolution;
- return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1 &&
- !(node->getCnnLayer()->insData[0].lock()->getPrecision() == Precision::BF16 &&
- node->getCnnLayer()->outData[0]->getPrecision() == Precision::FP32);
+ return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
};
auto isSutableChildNode = [](MKLDNNNodePtr node) {
auto isSutableParentNode = [](MKLDNNNodePtr node) {
return node->getType() == Convolution &&
node->getChildEdges().size() == 1 &&
- node->getCnnLayer()->precision == Precision::FP32 &&
- !(node->getCnnLayer()->insData[0].lock()->getPrecision() == Precision::BF16 &&
- node->getCnnLayer()->outData[0]->getPrecision() == Precision::FP32);
+ node->getCnnLayer()->precision == Precision::FP32;
};
auto isSutableChildNode = [&](MKLDNNNodePtr node) {
#include <transformations/common_optimizations/common_optimizations.hpp>
#include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
#include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset2.hpp>
#include <ngraph/op/fused/gelu.hpp>
Engine::~Engine() {
ExecutorManager::getInstance()->clear("CPUStreamsExecutor");
+ ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
}
InferenceEngine::ExecutableNetworkInternal::Ptr
-Engine::LoadExeNetworkImpl(const ICore * /*core*/, const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
+Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
// verification of supported input
InferenceEngine::InputsDataMap _networkInputs;
network.getInputsInfo(_networkInputs);
// Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
ngraph::pass::CommonOptimizations().run_on_function(nGraphFunc);
+ ngraph::pass::ConvertOpSet3ToOpSet2(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet2ToOpSet1(transformations_callback).run_on_function(nGraphFunc);
ngraph::pass::ConvertOpSet1ToLegacy(transformations_callback).run_on_function(nGraphFunc);
clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
~Engine() override;
InferenceEngine::ExecutableNetworkInternal::Ptr
- LoadExeNetworkImpl(const InferenceEngine::ICore * core, const InferenceEngine::ICNNNetwork &network,
+ LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
const std::map<std::string, std::string> &config) override;
void AddExtension(InferenceEngine::IExtensionPtr extension) override;
- /**
- * @deprecated
- * @param config
- */
+
void SetConfig(const std::map<std::string, std::string> &config) override;
InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
std::map<std::string, IShapeInferImpl::Ptr> si_list;
};
-template <mkldnn::impl::cpu::cpu_isa_t T>
-class TExtensionsHolder : public ExtensionsHolder {};
-
template<mkldnn::impl::cpu::cpu_isa_t Type>
class MKLDNNExtensions : public IExtension {
public:
}
static std::shared_ptr<ExtensionsHolder> GetExtensionsHolder() {
- static std::shared_ptr<TExtensionsHolder<Type>> localHolder;
+ static std::shared_ptr<ExtensionsHolder> localHolder;
if (localHolder == nullptr) {
- localHolder = std::make_shared<TExtensionsHolder<Type>>();
+ localHolder = std::make_shared<ExtensionsHolder>();
}
- return std::dynamic_pointer_cast<ExtensionsHolder>(localHolder);
+ return localHolder;
}
private:
});
}
-std::multimap<InferenceEngine::SizeVector, MKLDNNPermuteNode::PermuteImpl> MKLDNNPermuteNode::OptimizedCases = {
+const std::multimap<InferenceEngine::SizeVector, MKLDNNPermuteNode::PermuteImpl> MKLDNNPermuteNode::OptimizedCases = {
{{0, 2, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_0231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
return true;
})}, // NCHW -> NHWC case
isApplicable isValidParams;
};
- static std::multimap<InferenceEngine::SizeVector, PermuteImpl> OptimizedCases;
+ static const std::multimap<InferenceEngine::SizeVector, PermuteImpl> OptimizedCases;
std::shared_ptr<jit_uni_permute_kernel> permute_kernel;
};
invertVectorCopyUtoI(poolingLayer->_stride, stride);
invertVectorCopyUtoI(poolingLayer->_kernel, kernel);
auto allPads = getPaddings(*poolingLayer);
- invertVectorCopyUtoI(allPads.begin, paddingL);
- invertVectorCopyUtoI(allPads.end, paddingR);
+ invertVectorCopyUtoI(allPads.begin, data_pad_begin);
+ invertVectorCopyUtoI(allPads.end, data_pad_end);
+ effective_pad_begin = data_pad_begin;
+ effective_pad_end.resize(data_pad_end.size());
auto parentDims = getParentEdgeAt(0)->getDims();
auto childDims = getChildEdgeAt(0)->getDims();
if ((parentDims.ndims() < 4) || (parentDims.ndims() > 5))
THROW_IE_EXCEPTION << "Pooling layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
- for (int i = 0; i < paddingR.size(); i++) {
+ for (int i = 0; i < effective_pad_end.size(); i++) {
int krn = kernel[i];
int src = getParentEdgeAt(0)->getDims()[2 + i];
int dst = getChildEdgeAt(0)->getDims()[2 + i];
- int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
- paddingR[i] = (dst - calc_dst) * stride[i];
+ int calc_dst = (src - krn + data_pad_begin[i]) / stride[i] + 1;
+ effective_pad_end[i] = (dst - calc_dst) * stride[i];
}
if (inputPrecision == Precision::I8 || inputPrecision == Precision::U8) {
// i8 layers supports only ndhwc and nhwc layouts
algorithm alg;
if (type == PoolingLayer::PoolType::AVG) {
bool not_zero_l = false;
- for (auto lr : paddingL) {
+ for (auto lr : data_pad_begin) {
if (lr) {
not_zero_l = true;
break;
}
}
- if (!exclude_pad && not_zero_l)
+ bool not_zero_r = false;
+ for (auto pr : data_pad_end) {
+ if (pr) {
+ not_zero_r = true;
+ break;
+ }
+ }
+ if (!exclude_pad && (not_zero_l || not_zero_r))
alg = pooling_avg_include_padding;
else
alg = pooling_avg_exclude_padding;
std::shared_ptr<pooling_forward::desc> desc_ptr(
new pooling_forward::desc(prop_kind::forward_scoring, alg,
in_candidate, out_candidate,
- stride, kernel, paddingL, paddingR,
+ stride, kernel, effective_pad_begin, effective_pad_end,
mkldnn::padding_kind::zero));
- bool not_zero_r = false;
- for (auto pr : paddingR) {
- if (pr) {
- not_zero_r = true;
- break;
- }
- }
- if (alg == pooling_avg_include_padding && not_zero_r) {
+ if (alg == pooling_avg_include_padding) {
// In case of AVG including paddings the norm coeff should be calculated
// with tacking into account original pads. So we need to restore
- // original values (R_padding = L_padding).
+ // original values for end paddings.
//
// WA. Because mkldnn uses different formula to calculate AVG norm coeff
// in compare with Caffe. In mkldnn coeff is always 1/(KH*KW)
- for (int i = 0; i < paddingL.size(); i++) desc_ptr->data.padding[1][i] = paddingL[i];
+ for (int i = 0; i < data_pad_end.size(); i++) {
+ if (data_pad_end[i] != effective_pad_end[i])
+ desc_ptr->data.padding[1][i] = static_cast<ptrdiff_t>(data_pad_end[i]);
+ }
}
descs.emplace_back(desc_ptr);
InferenceEngine::PoolingLayer::PoolType type = InferenceEngine::PoolingLayer::MAX;
bool exclude_pad = false;
std::vector<ptrdiff_t> stride;
- std::vector<ptrdiff_t> paddingL;
- std::vector<ptrdiff_t> paddingR;
std::vector<ptrdiff_t> kernel;
+ /// Effective padding. Used to define correct output shape by MKLDNN
+ /// reshape formula: (iw - kernel + pad_l + pad_r) / strides[i - 2] + 1
+ /// should be passed into pooling desc constructor.
+ std::vector<ptrdiff_t> effective_pad_begin;
+ std::vector<ptrdiff_t> effective_pad_end;
+
+ /// Effective pad value. Describe how much zero element added to input
+ /// data tensor. May be less than "Effective padding" values.
+ /// If pooling window is out of this padding, the region of averaging
+ /// is decreased.
+ std::vector<ptrdiff_t> data_pad_begin;
+ std::vector<ptrdiff_t> data_pad_end;
+
InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::FP32;
InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_resample_nearest_kernel_f32)
explicit jit_uni_resample_nearest_kernel_f32(jit_resample_config_params jcp, const mkldnn_primitive_attr &attr)
- : jit_uni_resample_nearest_kernel(jcp, attr), jit_generator() {
+ : jit_uni_resample_nearest_kernel(jcp, attr), jit_generator() {
const auto &p = attr_.post_ops_;
for (int i = 0; i < p.len_; i++) {
auto &post_op = p.entry_[i];
}
}
+ if (inputPrecision == Precision::BF16) {
+ inputPrecision = Precision::FP32;
+ }
+
+ if (outputPrecision == Precision::BF16) {
+ outputPrecision = Precision::FP32;
+ }
+
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision);
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision);
// f32 and no fused, f32->input is f32, no fuse->output is f32
void MKLDNNResampleNode::NearestNeighbor_PLN(const float *in_ptr_, float *out_ptr_, int B, int C, int ID, int IH, int IW,
- float fx, float fy, float fz, int OD, int OH, int OW) {
+ float fx, float fy, float fz, int OD, int OH, int OW) {
std::vector<int> index_buffer(OD * OH * OW);
for (int oz = 0; oz < OD; oz++) {
float iz = oz * fz;
// int8->input may be int8, fused->output may be int8
template <typename in_data_t, typename out_data_t>
void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_t *out_ptr_, int B, int C, int ID, int IH, int IW,
- float fx, float fy, float fz, int OD, int OH, int OW) {
+ float fx, float fy, float fz, int OD, int OH, int OW) {
std::vector<int> index_d(OD);
std::vector<int> index_h(OH);
std::vector<int> index_w(OW);
template <typename in_data_t, typename out_data_t>
void MKLDNNResampleNode::LinearInterpolation(const in_data_t *in_ptr_, out_data_t *out_ptr_, int B, int C, int ID, int IH, int IW,
- float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias) {
+ float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias) {
if (IW == OW && IH == OH && ID == OD) {
size_t size = B * C * ID * IH * IW;
if (input_prec == Precision::FP32) {
--- /dev/null
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set (TARGET_NAME "MultiDevicePlugin")
+
+if(ENABLE_LTO)
+ ie_enable_lto()
+endif()
+
+file(GLOB SOURCES
+ ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+)
+
+file(GLOB HEADERS
+ ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp
+)
+
+ie_add_plugin(NAME ${TARGET_NAME}
+ DEVICE_NAME "MULTI"
+ SOURCES ${SOURCES} ${HEADERS}
+ VERSION_DEFINES_FOR multi_device.cpp)
+
+target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
+
+set_ie_threading_interface_for(${TARGET_NAME})
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <string>
+#include <vector>
+#include <iostream>
+#include <memory>
+#include <utility>
+#include <map>
+#include <unordered_map>
+
+#include "ie_metric_helpers.hpp"
+#include <ie_api.h>
+#include <cpp_interfaces/base/ie_plugin_base.hpp>
+#include <cpp_interfaces/base/ie_infer_async_request_base.hpp>
+#include <multi-device/multi_device_config.hpp>
+#include <ie_plugin_config.hpp>
+#include "multi_device.hpp"
+
+namespace MultiDevicePlugin {
+ using namespace InferenceEngine;
+// ------------------------------MultiDeviceInferRequest----------------------------
+MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap& networkInputs,
+ const OutputsDataMap& networkOutputs)
+ : InferRequestInternal(networkInputs, networkOutputs) {
+ // Allocate all input blobs
+ for (const auto &it : networkInputs) {
+ Layout l = it.second->getLayout();
+ Precision p = it.second->getPrecision();
+ SizeVector dims = it.second->getTensorDesc().getDims();
+
+ TensorDesc desc = TensorDesc(p, dims, l);
+ _inputs[it.first] = make_blob_with_precision(desc);
+ _inputs[it.first]->allocate();
+ }
+ // Allocate all output blobs
+ for (const auto &it : networkOutputs) {
+ Layout l = it.second->getLayout();
+ Precision p = it.second->getPrecision();
+ SizeVector dims = it.second->getTensorDesc().getDims();
+
+ TensorDesc desc = TensorDesc(p, dims, l);
+ _outputs[it.first] = make_blob_with_precision(desc);
+ _outputs[it.first]->allocate();
+ }
+}
+
+void MultiDeviceInferRequest::SetBlobsToAnotherRequest(InferRequest& req) {
+ for (const auto &it : _networkInputs) {
+ Blob::Ptr blob;
+ auto &name = it.first;
+ // this request is already in BUSY state, so using the internal functions safely
+ GetBlob(name.c_str(), blob);
+ req.SetBlob(name.c_str(), blob);
+ }
+ for (const auto &it : _networkOutputs) {
+ Blob::Ptr blob;
+ auto &name = it.first;
+ // this request is already in BUSY state, so using the internal functions safely
+ GetBlob(name.c_str(), blob);
+ req.SetBlob(name.c_str(), blob);
+ }
+}
+
+MultiDeviceAsyncInferRequest::MultiDeviceAsyncInferRequest(
+ const MultiDeviceInferRequest::Ptr& inferRequest,
+ const bool needPerfCounters,
+ const MultiDeviceExecutableNetwork::Ptr& multiDeviceExecutableNetwork,
+ const ITaskExecutor::Ptr& callbackExecutor) :
+ AsyncInferRequestThreadSafeDefault(inferRequest, nullptr, callbackExecutor),
+ _multiDeviceExecutableNetwork{multiDeviceExecutableNetwork},
+ _inferRequest{inferRequest},
+ _needPerfCounters{needPerfCounters} {
+ struct ThisRequestExecutor : public ITaskExecutor {
+ explicit ThisRequestExecutor(MultiDeviceAsyncInferRequest* _this_) : _this{_this_} {}
+ void run(Task task) override {
+ auto workerInferRequest = _this->_workerInferRequest;
+ workerInferRequest->_task = std::move(task);
+ workerInferRequest->_inferRequest.StartAsync();
+ };
+ MultiDeviceAsyncInferRequest* _this = nullptr;
+ };
+ _pipeline = {
+ {_multiDeviceExecutableNetwork, [this] {
+ _workerInferRequest = MultiDeviceExecutableNetwork::_thisWorkerInferRequest;
+ _inferRequest->SetBlobsToAnotherRequest(_workerInferRequest->_inferRequest);
+ }},
+ {std::make_shared<ThisRequestExecutor>(this), [this] {
+ auto status = _workerInferRequest->_status;
+ if (InferenceEngine::StatusCode::OK != status) {
+ if (nullptr != InferenceEngine::CurrentException()) {
+ std::rethrow_exception(InferenceEngine::CurrentException());
+ } else {
+ THROW_IE_EXCEPTION << InferenceEngine::details::as_status << status;
+ }
+ }
+ if (_needPerfCounters) {
+ _perfMap = _workerInferRequest->_inferRequest.GetPerformanceCounts();
+ }
+ }}
+ };
+}
+
+void MultiDeviceAsyncInferRequest::Infer_ThreadUnsafe() {
+ InferUsingAsync();
+}
+
+void MultiDeviceAsyncInferRequest::GetPerformanceCounts_ThreadUnsafe(std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
+ perfMap = std::move(_perfMap);
+}
+
+MultiDeviceAsyncInferRequest::~MultiDeviceAsyncInferRequest() {
+ StopAndWait();
+}
+
+// ------------------------------MultiDeviceExecutableNetwork----------------------------
+
+thread_local MultiDeviceExecutableNetwork::WorkerInferRequest* MultiDeviceExecutableNetwork::_thisWorkerInferRequest = nullptr;
+
+struct IdleGuard {
+ explicit IdleGuard(MultiDeviceExecutableNetwork::WorkerInferRequest* workerInferRequestPtr,
+ MultiDeviceExecutableNetwork::NotBusyWorkerRequests& notBusyWorkerRequests) :
+ _workerInferRequestPtr{workerInferRequestPtr},
+ _notBusyWorkerRequests{¬BusyWorkerRequests} {
+ }
+ ~IdleGuard() {
+ if (nullptr != _notBusyWorkerRequests) {
+ _notBusyWorkerRequests->push(_workerInferRequestPtr);
+ }
+ }
+ MultiDeviceExecutableNetwork::NotBusyWorkerRequests* Release() {
+ auto notBusyWorkerRequests = _notBusyWorkerRequests;
+ _notBusyWorkerRequests = nullptr;
+ return notBusyWorkerRequests;
+ }
+ MultiDeviceExecutableNetwork::WorkerInferRequest* _workerInferRequestPtr = nullptr;
+ MultiDeviceExecutableNetwork::NotBusyWorkerRequests* _notBusyWorkerRequests = nullptr;
+};
+
+MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<InferenceEngine::ExecutableNetwork>& networksPerDevice,
+ const DeviceMap<DeviceInformation>& networkDevices,
+ const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
+ const bool needPerfCounters) :
+ InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, std::make_shared<InferenceEngine::ImmediateExecutor>()),
+ _devicePriorities{networkDevices},
+ _networksPerDevice{networksPerDevice},
+ _config{config},
+ _needPerfCounters{needPerfCounters} {
+ _taskExecutor.reset();
+ for (auto&& networkValue : _networksPerDevice) {
+ auto& device = networkValue.first;
+ auto& network = networkValue.second;
+
+ auto itNumRequests = _devicePriorities.find(device);
+ unsigned int optimalNum = 0;
+ try {
+ optimalNum = network.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
+ } catch (const details::InferenceEngineException &iie) {
+ THROW_IE_EXCEPTION
+ << "Every device used with the Multi-Device should "
+ << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
+ << "Failed to query the metric for the " << device << " with error:" << iie.what();
+ }
+ const auto numRequests = (_devicePriorities.end() == itNumRequests ||
+ itNumRequests->second.numRequestsPerDevices == -1) ? optimalNum : itNumRequests->second.numRequestsPerDevices;
+ auto& workerRequests = _workerRequests[device];
+ auto& idleWorkerRequests = _idleWorkerRequests[device];
+ workerRequests.resize(numRequests);
+ auto* idleWorkerRequestsPtr = &(idleWorkerRequests);
+ for (auto&& workerRequest : workerRequests) {
+ workerRequest._inferRequest = network.CreateInferRequest();
+ auto* workerRequestPtr = &workerRequest;
+ idleWorkerRequests.push(workerRequestPtr);
+ workerRequest._inferRequest.SetCompletionCallback<std::function<void(InferRequest, StatusCode)>>(
+ [workerRequestPtr, this, device, idleWorkerRequestsPtr] (InferRequest , StatusCode status) mutable {
+ IdleGuard idleGuard{workerRequestPtr, *idleWorkerRequestsPtr};
+ workerRequestPtr->_status = status;
+ {
+ auto capturedTask = std::move(workerRequestPtr->_task);
+ capturedTask();
+ }
+ if (!_terminate) {
+ idleGuard.Release()->push(workerRequestPtr);
+ ScheduleToWorkerInferRequest();
+ }
+ });
+ }
+ }
+}
+
+void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest() {
+ auto devices = [&] {
+ std::lock_guard<std::mutex> lock(_mutex);
+ return _devicePriorities;
+ }();
+ for (auto&& device : devices) {
+ auto& idleWorkerRequests = _idleWorkerRequests[device.first];
+ WorkerInferRequest* workerRequestPtr = nullptr;
+ if (idleWorkerRequests.try_pop(workerRequestPtr)) {
+ IdleGuard idleGuard{workerRequestPtr, idleWorkerRequests};
+ Task inferPipelineTask;
+ if (_inferPipelineTasks.try_pop(inferPipelineTask)) {
+ _thisWorkerInferRequest = workerRequestPtr;
+ inferPipelineTask();
+ idleGuard.Release();
+ break;
+ }
+ }
+ }
+}
+
+void MultiDeviceExecutableNetwork::run(Task inferPipelineTask) {
+ if (!_terminate) {
+ _inferPipelineTasks.push(std::move(inferPipelineTask));
+ ScheduleToWorkerInferRequest();
+ }
+}
+
+MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
+ {
+ std::lock_guard<std::mutex> lock(_mutex);
+ _devicePriorities.clear();
+ }
+ _terminate = true;
+ /* NOTE: The only threads that use `MultiDeviceExecutableNetwork` Context are those that are used by Worker infer requests.
+ * But AsyncInferRequest destructor should waits for all asynchronous tasks that are used by the request
+ */
+ _workerRequests.clear();
+}
+
+InferenceEngine::InferRequestInternal::Ptr MultiDeviceExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
+ InferenceEngine::OutputsDataMap networkOutputs) {
+ return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs);
+}
+
+void MultiDeviceExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& asyncRequest) {
+ auto syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs);
+ syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
+ auto asyncTreadSafeImpl = std::make_shared<MultiDeviceAsyncInferRequest>(std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl),
+ _needPerfCounters,
+ std::static_pointer_cast<MultiDeviceExecutableNetwork>(shared_from_this()),
+ _callbackExecutor);
+ asyncRequest.reset(new InferRequestBase<MultiDeviceAsyncInferRequest>(asyncTreadSafeImpl), [](IInferRequest *p) { p->Release(); });
+ asyncTreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
+}
+
+void MultiDeviceExecutableNetwork::SetConfig(const std::map<std::string, InferenceEngine::Parameter> &config,
+ InferenceEngine::ResponseDesc * /* resp */) {
+ auto priorities = config.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
+ if (priorities == config.end() || config.size() > 1) {
+ THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str <<
+ "The only config supported for the Network's SetConfig is MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES";
+ } else {
+ auto multiPlugin = std::dynamic_pointer_cast<MultiDeviceInferencePlugin>(this->_plugin);
+ assert(multiPlugin != nullptr);
+ auto metaDevices = multiPlugin->ParseMetaDevices(priorities->second, {});
+
+ if (std::any_of(metaDevices.begin(), metaDevices.end(), [](const std::pair<DeviceName, DeviceInformation> & kvp) {
+ return kvp.second.numRequestsPerDevices != -1;
+ })) {
+ THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "You can only change device priorities but not number of requests"
+ <<" with the Network's SetConfig(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES!";
+ }
+
+ {
+ std::lock_guard<std::mutex> lock{_mutex};
+ for (auto && device : metaDevices) {
+ if (_devicePriorities.find(device.first) == _devicePriorities.end()) {
+ THROW_IE_EXCEPTION << NOT_FOUND_str << "You can only change device priorities but not add new devices with"
+ << " the Network's SetConfig(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES." << device.first <<
+ " device was not in the original device list!";
+ }
+ }
+ _devicePriorities = metaDevices;
+
+ // update value in config
+ _config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = priorities->second;
+ }
+ }
+}
+
+void MultiDeviceExecutableNetwork::GetConfig(const std::string &name, InferenceEngine::Parameter &result,
+ InferenceEngine::ResponseDesc * /* resp */) const {
+ auto res = _config.find(name);
+ if (res != _config.end()) {
+ result = res->second;
+ } else {
+ THROW_IE_EXCEPTION << NOT_FOUND_str << name <<" not found in the ExecutableNetwork config";
+ }
+}
+
+void MultiDeviceExecutableNetwork::GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const {
+ if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
+ unsigned int res = 0u;
+ for (auto n : _networksPerDevice) {
+ try {
+ res += n.second.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
+ } catch (const details::InferenceEngineException &iie) {
+ THROW_IE_EXCEPTION
+ << "Every device used with the Multi-Device should "
+ << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
+ << "Failed to query the metric for the " << n.first << " with error:" << iie.what();
+ }
+ }
+ result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, res);
+ } else if (name == METRIC_KEY(NETWORK_NAME)) {
+ auto it = _networksPerDevice.begin();
+ IE_ASSERT(it != _networksPerDevice.end());
+ result = IE_SET_METRIC(NETWORK_NAME, it->second.GetMetric(
+ METRIC_KEY(NETWORK_NAME)).as<std::string>());
+ } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
+ result = IE_SET_METRIC(SUPPORTED_METRICS, {
+ METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
+ METRIC_KEY(SUPPORTED_METRICS),
+ METRIC_KEY(NETWORK_NAME),
+ METRIC_KEY(SUPPORTED_CONFIG_KEYS)
+ });
+ } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
+ std::vector<std::string> configKeys = { MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES };
+ result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys);
+ } else {
+ THROW_IE_EXCEPTION << "Unsupported Network metric: " << name;
+ }
+}
+
+// ------------------------------MultiDeviceInferencePlugin----------------------------
+
+namespace {
+
+std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config,
+ const std::map<std::string, std::string> & local) {
+ for (auto && kvp : local) {
+ config[kvp.first] = kvp.second;
+ }
+ return config;
+}
+
+} // namespace
+
+std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfig(
+ const std::map<std::string, std::string> & config, const std::string & deviceName) const {
+ std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+ std::map<std::string, std::string> supportedConfig;
+ for (auto&& key : supportedConfigKeys) {
+ auto itKey = config.find(key);
+ if (config.end() != itKey) {
+ supportedConfig[key] = itKey->second;
+ }
+ }
+ return supportedConfig;
+}
+
+DeviceMap<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(const std::string& priorities,
+ const std::map<std::string, std::string> & config) const {
+ DeviceMap<DeviceInformation> metaDevices;
+
+ // parsing the string and splitting to tokens
+ std::vector<std::string> devicesWithRequests;
+ // parsing the string and splitting the comma-separated tokens
+ std::string::size_type i = 0;
+ std::string::size_type idelimeter;
+ while ((idelimeter = priorities.find(',', i)) != std::string::npos) {
+ devicesWithRequests.push_back(priorities.substr(i, idelimeter - i));
+ i = idelimeter + 1;
+ }
+ // last token in the string (which has no comma after that)
+ devicesWithRequests.push_back(priorities.substr(i, priorities.length() - i));
+
+ auto getDeviceConfig = [&] (const DeviceName & deviceWithID) {
+ DeviceIDParser deviceParser(deviceWithID);
+ std::string deviceName = deviceParser.getDeviceName();
+ std::map<std::string, std::string> tconfig = mergeConfigs(_config, config);
+
+ // set device ID if any
+ std::string deviceIDLocal = deviceParser.getDeviceID();
+ if (!deviceIDLocal.empty()) {
+ tconfig[PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal;
+ }
+
+ return GetSupportedConfig(tconfig, deviceName);
+ };
+
+ for (auto && d : devicesWithRequests) {
+ auto openingBracket = d.find_first_of('(');
+ auto closingBracket = d.find_first_of(')', openingBracket);
+ auto device_name = d.substr(0, openingBracket);
+
+ int numRequests = -1;
+ if (closingBracket != std::string::npos && openingBracket < closingBracket) {
+ numRequests = std::stol(d.substr(openingBracket + 1, closingBracket - 1));
+
+ if (numRequests <= 0) {
+ THROW_IE_EXCEPTION << "Priority value for '" << device_name << "' must be > 0, while " << numRequests
+ << "is passed";
+ }
+ }
+
+ // create meta device
+ metaDevices[device_name] = { getDeviceConfig(device_name), numRequests };
+ }
+
+ return metaDevices;
+}
+
+Parameter MultiDeviceInferencePlugin::GetConfig(const std::string& name,
+ const std::map<std::string, Parameter> & options) const {
+ if (name == MULTI_CONFIG_KEY(DEVICE_PRIORITIES)) {
+ auto it = _config.find(MULTI_CONFIG_KEY(DEVICE_PRIORITIES));
+ if (it == _config.end()) {
+ THROW_IE_EXCEPTION << "Value for KEY_MULTI_DEVICE_PRIORITIES is not set";
+ } else {
+ return { it->second };
+ }
+ } else {
+ THROW_IE_EXCEPTION << "Unsupported config key: " << name;
+ }
+}
+
+void MultiDeviceInferencePlugin::SetConfig(const std::map<std::string, std::string> & config) {
+ for (auto && kvp : config) {
+ _config[kvp.first] = kvp.second;
+ }
+}
+
+IE_SUPPRESS_DEPRECATED_START
+
+INFERENCE_PLUGIN_API(InferenceEngine::StatusCode) CreatePluginEngine(
+ InferenceEngine::IInferencePlugin *&plugin,
+ InferenceEngine::ResponseDesc *resp) noexcept {
+ try {
+ plugin = make_ie_compatible_plugin(
+ {{2, 1},
+ CI_BUILD_NUMBER,
+ "MultiDevicePlugin"}, std::make_shared<MultiDeviceInferencePlugin>());
+ return OK;
+ }
+ catch (std::exception &ex) {
+ return DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
+ }
+}
+
+IE_SUPPRESS_DEPRECATED_END
+
+MultiDeviceInferencePlugin::MultiDeviceInferencePlugin() {
+ _pluginName = "MULTI";
+}
+
+InferenceEngine::Parameter MultiDeviceInferencePlugin::GetMetric(const std::string& name,
+ const std::map<std::string, InferenceEngine::Parameter> & options) const {
+ if (name == METRIC_KEY(SUPPORTED_METRICS)) {
+ std::vector<std::string> metrics;
+ metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
+ metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
+ metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+ IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
+ } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
+ std::string name = { "MULTI" };
+ IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, name);
+ } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
+ std::vector<std::string> configKeys = { MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES };
+ IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
+ } else {
+ THROW_IE_EXCEPTION << "Unsupported metric key " << name;
+ }
+}
+
+ExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const ICNNNetwork &network,
+ const std::map<std::string, std::string>& config) {
+ if (GetCore() == nullptr) {
+ THROW_IE_EXCEPTION << "Please, work with MULTI device via InferencEngine::Core object";
+ }
+
+ // TODO: do we really need a clone?
+ ICNNNetwork::Ptr clonedNetwork = cloneNet(network);
+
+ auto fullConfig = mergeConfigs(_config, config);
+ auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
+ if (priorities == fullConfig.end()) {
+ THROW_IE_EXCEPTION << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
+ }
+
+ DeviceMap<DeviceInformation> metaDevices = ParseMetaDevices(priorities->second, fullConfig);
+
+ // collect the settings that are applicable to the devices we are loading the network to
+ std::unordered_map<std::string, InferenceEngine::Parameter> multiNetworkConfig;
+ multiNetworkConfig.insert(*priorities);
+
+ DeviceMap<ExecutableNetwork> executableNetworkPerDevice;
+ for (auto& p : metaDevices) {
+ auto & deviceName = p.first;
+ auto & metaDevice = p.second;
+ auto & deviceConfig = metaDevice.config;
+ executableNetworkPerDevice.insert({ deviceName, GetCore()->LoadNetwork(CNNNetwork{clonedNetwork}, deviceName, deviceConfig) });
+ multiNetworkConfig.insert(deviceConfig.begin(), deviceConfig.end());
+ }
+ if (executableNetworkPerDevice.empty())
+ THROW_IE_EXCEPTION << NOT_FOUND_str << "Failed to load Executable network to any device "
+ << "that the MULTI device is initialized to work with";
+
+ auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
+ bool enablePerfCounters = (fullConfig.end() != perfConfig) && (perfConfig->second == PluginConfigParams::YES);
+
+ return std::make_shared<MultiDeviceExecutableNetwork>(executableNetworkPerDevice,
+ metaDevices,
+ multiNetworkConfig,
+ enablePerfCounters);
+}
+
+void MultiDeviceInferencePlugin::QueryNetwork(const ICNNNetwork& network,
+ const std::map<std::string, std::string>& config,
+ QueryNetworkResult& queryResult) const {
+ if (GetCore() == nullptr) {
+ THROW_IE_EXCEPTION << "Please, work with MULTI device via InferencEngine::Core object";
+ }
+
+ queryResult.rc = StatusCode::OK;
+ queryResult.supportedLayersMap.clear();
+
+ auto fullConfig = mergeConfigs(_config, config);
+ auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
+ if (priorities == fullConfig.end()) {
+ THROW_IE_EXCEPTION << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
+ }
+
+ DeviceMap<DeviceInformation> metaDevices = ParseMetaDevices(priorities->second, fullConfig);
+ std::map<std::string, QueryNetworkResult> queryResults;
+
+ for (auto&& value : metaDevices) {
+ auto& deviceName = value.first;
+ auto& metaDevice = value.second;
+ queryResults[deviceName] = GetCore()->QueryNetwork(network, deviceName, metaDevice.config);
+ }
+
+ details::CNNNetworkIterator i(&network);
+ while (i != details::CNNNetworkIterator()) {
+ CNNLayer::Ptr layer = *i;
+ bool layerIsInQueryResultsForAllDevices = std::all_of(std::begin(queryResults), std::end(queryResults),
+ [&](const std::map<std::string, QueryNetworkResult>::value_type& qr) {
+ return qr.second.supportedLayersMap.end() != qr.second.supportedLayersMap.find(layer->name);});
+ if (layerIsInQueryResultsForAllDevices) {
+ queryResult.supportedLayersMap[layer->name] = GetName();
+ }
+ i++;
+ }
+}
+} // namespace MultiDevicePlugin
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include <atomic>
+#include <mutex>
+#include <queue>
+#include <unordered_map>
+#include <map>
+#include <vector>
+#include <utility>
+#include <memory>
+#include <string>
+
+#include <cpp/ie_plugin_cpp.hpp>
+#include <ie_plugin_dispatcher.hpp>
+#include <cpp_interfaces/impl/ie_plugin_internal.hpp>
+#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
+#include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
+#include "ie_iinfer_request.hpp"
+#include "details/ie_exception_conversion.hpp"
+#include <ie_parallel.hpp>
+
+#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
+#include <tbb/concurrent_queue.h>
+#endif
+
+namespace MultiDevicePlugin {
+
+using DeviceName = std::string;
+
+struct DeviceInformation {
+ std::map<std::string, std::string> config;
+ int numRequestsPerDevices;
+};
+
+template<typename T>
+using DeviceMap = std::unordered_map<DeviceName, T>;
+
+class MultiDeviceInferRequest : public InferenceEngine::InferRequestInternal {
+public:
+ using Ptr = std::shared_ptr<MultiDeviceInferRequest>;
+ explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
+ const InferenceEngine::OutputsDataMap& networkOutputs);
+ void GetPerformanceCounts(std::map<std::string, InferenceEngineProfileInfo>&) const override {
+ THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+ }
+ void InferImpl() override {
+ THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+ }
+ // Multi-Device impl specific: sets the data (blobs from the device-less requets to the specific device request)
+ void SetBlobsToAnotherRequest(InferenceEngine::InferRequest& req);
+};
+
+#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
+template <typename T>
+using ThreadSafeQueue = tbb::concurrent_queue<T>;
+#else
+template <typename T>
+class ThreadSafeQueue {
+public:
+ void push(T value) {
+ std::lock_guard<std::mutex> lock(_mutex);
+ _queue.push(std::move(value));
+ }
+
+ bool try_pop(T& value) {
+ std::lock_guard<std::mutex> lock(_mutex);
+ if (!_queue.empty()) {
+ value = std::move(_queue.front());
+ _queue.pop();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bool empty() {
+ std::lock_guard<std::mutex> lock(_mutex);
+ return _queue.empty();
+ }
+
+protected:
+ std::queue<T> _queue;
+ std::mutex _mutex;
+};
+#endif
+
+class MultiDeviceExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault,
+ public ITaskExecutor {
+public:
+ using Ptr = std::shared_ptr<MultiDeviceExecutableNetwork>;
+ struct WorkerInferRequest {
+ InferenceEngine::InferRequest _inferRequest;
+ Task _task;
+ InferenceEngine::StatusCode _status = InferenceEngine::StatusCode::OK;
+ };
+ using NotBusyWorkerRequests = ThreadSafeQueue<WorkerInferRequest*>;
+
+ explicit MultiDeviceExecutableNetwork(const DeviceMap<InferenceEngine::ExecutableNetwork>& networksPerDevice,
+ const DeviceMap<DeviceInformation>& networkDevices,
+ const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
+ const bool needPerfCounters = false);
+
+ void SetConfig(const std::map<std::string, InferenceEngine::Parameter> &config, InferenceEngine::ResponseDesc *resp) override;
+ void GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
+ void GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
+ void run(Task inferTask) override;
+ void CreateInferRequest(InferenceEngine::IInferRequest::Ptr& asyncRequest) override;
+ InferenceEngine::InferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
+ InferenceEngine::OutputsDataMap networkOutputs) override;
+ ~MultiDeviceExecutableNetwork() override;
+
+ void ScheduleToWorkerInferRequest();
+
+ static thread_local WorkerInferRequest* _thisWorkerInferRequest;
+ std::atomic_bool _terminate = {false};
+ std::mutex _mutex;
+ DeviceMap<DeviceInformation> _devicePriorities;
+ DeviceMap<InferenceEngine::ExecutableNetwork> _networksPerDevice;
+ ThreadSafeQueue<Task> _inferPipelineTasks;
+ DeviceMap<NotBusyWorkerRequests> _idleWorkerRequests;
+ DeviceMap<std::vector<WorkerInferRequest>> _workerRequests;
+ std::unordered_map<std::string, InferenceEngine::Parameter> _config;
+ bool _needPerfCounters = false;
+};
+
+class MultiDeviceAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
+public:
+ using Ptr = std::shared_ptr<MultiDeviceAsyncInferRequest>;
+
+ explicit MultiDeviceAsyncInferRequest(const MultiDeviceInferRequest::Ptr& inferRequest,
+ const bool needPerfCounters,
+ const MultiDeviceExecutableNetwork::Ptr& multiDeviceExecutableNetwork,
+ const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
+ void Infer_ThreadUnsafe() override;
+ void GetPerformanceCounts_ThreadUnsafe(std::map<std::string, InferenceEngineProfileInfo> &_perfMap) const override;
+ ~MultiDeviceAsyncInferRequest() override;
+
+protected:
+ MultiDeviceExecutableNetwork::Ptr _multiDeviceExecutableNetwork;
+ MultiDeviceInferRequest::Ptr _inferRequest;
+ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> _perfMap;
+ bool _needPerfCounters = false;
+ MultiDeviceExecutableNetwork::WorkerInferRequest* _workerInferRequest = nullptr;
+};
+
+class MultiDeviceInferencePlugin : public InferenceEngine::InferencePluginInternal {
+public:
+ MultiDeviceInferencePlugin();
+ ~MultiDeviceInferencePlugin() override = default;
+
+ InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork& network,
+ const std::map<std::string, std::string>& config) override;
+
+ void SetConfig(const std::map<std::string, std::string>& config) override;
+ Parameter GetConfig(const std::string& name,
+ const std::map<std::string, Parameter> & options) const override;
+ void QueryNetwork(const InferenceEngine::ICNNNetwork& network,
+ const std::map<std::string, std::string>& config,
+ InferenceEngine::QueryNetworkResult& res) const override;
+ InferenceEngine::Parameter GetMetric(const std::string& name,
+ const std::map<std::string, InferenceEngine::Parameter>& options) const override;
+
+ DeviceMap<DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
+ const std::map<std::string, std::string> & config) const;
+
+protected:
+ std::map<std::string, std::string> GetSupportedConfig(const std::map<std::string, std::string>& config,
+ const DeviceName & deviceName) const;
+};
+
+} // namespace MultiDevicePlugin
_core = core;
}
- const ICore* GetCore() const noexcept override {
+ ICore* GetCore() const noexcept override {
return _core;
}
THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
}
-
RemoteContext::Ptr GetDefaultContext() override {
THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
}
* @param config string-string map of config parameters relevant only for this load operation
* @return Shared pointer to the ExecutableNetwork object
*/
- virtual ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const ICore* core, const ICNNNetwork& network,
+ virtual ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const ICNNNetwork& network,
const std::map<std::string, std::string>& config) = 0;
/**
* @note The function is used in
* InferencePluginInternal::LoadNetwork(const ICNNNetwork&, const std::map<std::string, std::string>&, RemoteContext::Ptr)
* which performs common steps first and calls this plugin-dependent method implementation after.
- * @param core A pointer to ICore interface.
* @param network A network object
* @param context A remote context
* @param config string-string map of config parameters relevant only for this load operation
* @return Shared pointer to the ExecutableNetwork object
*/
- virtual ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const ICore* core, const ICNNNetwork& network,
- RemoteContext::Ptr context,
+ virtual ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const ICNNNetwork& network, RemoteContext::Ptr context,
const std::map<std::string, std::string>& config) {
- (void)core;
(void)network;
(void)context;
(void)config;
void cloneAndCreateExecutableNetwork(IExecutableNetwork::Ptr& executableNetwork, const ICNNNetwork& network,
const std::map<std::string, std::string>& config,
RemoteContext::Ptr context = nullptr) {
- InputsDataMap networkInputs;
- OutputsDataMap networkOutputs;
+ InputsDataMap networkInputs, networkInputsCloned;
+ OutputsDataMap networkOutputs, networkOutputsCloned;
network.getInputsInfo(networkInputs);
network.getOutputsInfo(networkOutputs);
- copyInputOutputInfo(networkInputs, networkOutputs, _networkInputs, _networkOutputs);
+ copyInputOutputInfo(networkInputs, networkOutputs, networkInputsCloned, networkOutputsCloned);
ExecutableNetworkInternal::Ptr impl;
if (nullptr == context) {
- impl = LoadExeNetworkImpl(GetCore(), network, config);
+ impl = LoadExeNetworkImpl(network, config);
} else {
- impl = LoadExeNetworkImpl(GetCore(), network, context, config);
+ impl = LoadExeNetworkImpl(network, context, config);
}
- impl->setNetworkInputs(_networkInputs);
- impl->setNetworkOutputs(_networkOutputs);
+ impl->setNetworkInputs(networkInputsCloned);
+ impl->setNetworkOutputs(networkOutputsCloned);
impl->SetPointerToPluginInternal(shared_from_this());
executableNetwork.reset(new ExecutableNetworkBase<ExecutableNetworkInternal>(impl), [](details::IRelease* p) {
}
std::string _pluginName; //!< A device name that plugins enables
- InferenceEngine::InputsDataMap _networkInputs; //!< Holds information about network inputs info
- InferenceEngine::OutputsDataMap _networkOutputs; //!< Holds information about network outputs data
std::map<std::string, std::string> _config; //!< A map config keys -> values
ICore* _core = nullptr; //!< A pointer to ICore interface
};
* @brief Gets reference to ICore interface
* @return Reference to ICore interface
*/
- virtual const ICore* GetCore() const noexcept = 0;
+ virtual ICore* GetCore() const noexcept = 0;
/**
* @brief Queries a plugin about support layers in network
virtual std::shared_ptr<ITaskExecutor> GetTaskExecutor() const = 0;
/**
+ * @deprecated Use ICore::GetMetric, ICore::LoadNetwork, ICore::QueryNetwork instead
* @brief Returns reference to plugin by a device name
* @param deviceName - a name of device
* @return Reference to plugin
virtual CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const = 0;
/**
+ * @brief Creates an executable network from a network object.
+ *
+ * Users can create as many networks as they need and use
+ * them simultaneously (up to the limitation of the hardware resources)
+ *
+ * @param network CNNNetwork object acquired from Core::ReadNetwork
+ * @param deviceName Name of device to load network to
+ * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load
+ * operation
+ * @return An executable network reference
+ */
+ virtual ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
+ const std::map<std::string, std::string>& config = {}) = 0;
+
+ /**
+ * @brief Creates an executable network from a previously exported network
+ * @param deviceName Name of device load executable network on
+ * @param networkModel network model stream
+ * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load
+ * operation*
+ * @return An executable network reference
+ */
+ virtual ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName = {},
+ const std::map<std::string, std::string>& config = {}) = 0;
+
+ /**
+ * @brief Query device if it supports specified network with specified configuration
+ *
+ * @param deviceName A name of a device to query
+ * @param network Network object to query
+ * @param config Optional map of pairs: (config parameter name, config parameter value)
+ * @return An object containing a map of pairs a layer name -> a device name supporting this layer.
+ */
+ virtual QueryNetworkResult QueryNetwork(const ICNNNetwork& network, const std::string& deviceName,
+ const std::map<std::string, std::string>& config) const = 0;
+
+ /**
+ * @brief Gets general runtime metric for dedicated hardware.
+ *
+ * The method is needed to request common device properties
+ * which are executable network agnostic. It can be device name, temperature, other devices-specific values.
+ *
+ * @param deviceName - A name of a device to get a metric value.
+ * @param name - metric name to request.
+ * @return Metric value corresponding to metric key.
+ */
+ virtual Parameter GetMetric(const std::string& deviceName, const std::string& name) const = 0;
+
+ /**
* @brief Default virtual destructor
*/
virtual ~ICore() = default;
#define IE_STR(x) IE_STR_(x)
#define IE_STR_(x) #x
-class ProfilingTask;
+struct ProfilingTask;
struct IttStatic {};
}
private:
-friend void annotateBegin(IttStatic&, IttProfilingTask& t);
-friend void annotateEnd(IttStatic&, IttProfilingTask& t);
+ friend void annotateBegin(IttStatic&, IttProfilingTask& t);
+ friend void annotateEnd(IttStatic&, IttProfilingTask& t);
std::string name;
#ifdef ENABLE_PROFILING_ITT
void run(Task task) override;
+ void Execute(Task task) override;
+
int GetStreamId() override;
int GetNumaNodeId() override;
* @return `ID` of current NUMA Node, or throws exceptions if called not from stream thread
*/
virtual int GetNumaNodeId() = 0;
+
+ /**
+ * @brief Execute the task in the current thread using streams executor configuration and constraints
+ * @param task A task to start
+ */
+ virtual void Execute(Task task) = 0;
};
#
function(ie_avx512_core_optimization_flags flags)
if(WIN32)
- if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(${flags} "/QxCORE-AVX512" PARENT_SCOPE)
- elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(${flags} "/arch:AVX512" PARENT_SCOPE)
else()
message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
endif()
else()
- if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(${flags} "-xCORE-AVX512" PARENT_SCOPE)
else()
set(${flags} "-mavx512f -mavx512bw -mavx512dq -mfma" PARENT_SCOPE)
// #include <transformations/transformations_tbl.hpp>
// #undef NGRAPH_PASS
-NGRAPH_PASS(NopElimination, ::ngraph::pass)
+// This pass must be called first in pipeline
+NGRAPH_PASS(InitNodeInfo, ::ngraph::pass)
+NGRAPH_PASS(ConvertPriorBox, ::ngraph::pass) // WA: ConvertPriorBox must be executed before CF
+NGRAPH_PASS(ConstantFolding, ::ngraph::pass)
+NGRAPH_PASS(RemoveFilteringBoxesBySize, ::ngraph::pass) // Resolves dynamism (replaces NonZero), CF needed
+NGRAPH_PASS(ConstantFolding, ::ngraph::pass)
+NGRAPH_PASS(StridedSliceOptimization, ::ngraph::pass) // depends on CF
+NGRAPH_PASS(NopElimination, ::ngraph::pass) // may introduce fake dynamism
+NGRAPH_PASS(AlgebraicSimplification, ::ngraph::pass) // may introduce fake dynamism
+NGRAPH_PASS(ConstantFolding, ::ngraph::pass)
+NGRAPH_PASS(ConvertScatterElementsToScatter, ::ngraph::pass) // partially depends on CF
// #include <transformations/transformations_tbl.hpp>
// #undef NGRAPH_PASS
-NGRAPH_PASS(InitNodeInfo, ::ngraph::pass)
-NGRAPH_PASS(ConvertPriorBox, ::ngraph::pass)
NGRAPH_PASS(ConstantFolding, ::ngraph::pass)
NGRAPH_PASS(ConvertReduceToPooling, ::ngraph::pass)
NGRAPH_PASS(ConvertMod, ::ngraph::pass)
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "transformations/utils/pass_param.hpp"
+
+namespace ngraph {
+namespace pass {
+
+ class INFERENCE_ENGINE_API_CLASS(ConvertBroadcast3);
+
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::ConvertBroadcast3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam {
+public:
+ ConvertBroadcast3() : GraphRewrite() {
+ convert_broadcast3();
+ }
+
+private:
+ void convert_broadcast3();
+};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "transformations/utils/pass_param.hpp"
+
+namespace ngraph {
+namespace pass {
+
+ class INFERENCE_ENGINE_API_CLASS(ConvertNMS3);
+
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::ConvertNMS3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam {
+public:
+ ConvertNMS3() : GraphRewrite() {
+ convert_nms3();
+ }
+
+private:
+ void convert_nms3();
+};
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ie_api.h>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "transformations/utils/pass_param.hpp"
+
+namespace ngraph {
+namespace pass {
+
+ class INFERENCE_ENGINE_API_CLASS(ConvertOpSet3ToOpSet2);
+
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::ConvertOpSet3ToOpSet2: public ngraph::pass::FunctionPass, public ngraph::pass::PassParam {
+public:
+ explicit ConvertOpSet3ToOpSet2(const PassParam::param_callback & callback = PassParam::getDefaultCallback())
+ : FunctionPass(), PassParam(callback) {}
+
+ bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef NGRAPH_PASS
+#warning "NGRAPH_PASS is not defined"
+#define NGRAPH_PASS(A, B)
+#endif
+
+// To register new pass you need to define NGRAPH_PASS
+// Usage example:
+// ngraph::pass:Manager pm;
+// #define NGRAPH_PASS(NAME, NAMESPACE) pm.register_pass<NAMESPACE::NAME>();
+// #include <transformations/transformations_tbl.hpp>
+// #undef NGRAPH_PASS
+
+NGRAPH_PASS(ConvertBroadcast3, ::ngraph::pass)
+NGRAPH_PASS(ConvertNMS3, ::ngraph::pass)
+NGRAPH_PASS(ConvertShapeOf3, ::ngraph::pass)
+NGRAPH_PASS(ConvertTopK3, ::ngraph::pass)
+
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+ class INFERENCE_ENGINE_API_CLASS(ConvertShapeOf3);
+
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::ConvertShapeOf3: public ngraph::pass::GraphRewrite {
+public:
+ ConvertShapeOf3() : GraphRewrite() {
+ convert_shapeof3();
+ }
+
+private:
+ void convert_shapeof3();
+};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "transformations/utils/pass_param.hpp"
+
+namespace ngraph {
+namespace pass {
+
+ class INFERENCE_ENGINE_API_CLASS(ConvertTopK3);
+
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::ConvertTopK3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam {
+public:
+ ConvertTopK3() : GraphRewrite() {
+ convert_topk3();
+ }
+
+private:
+ void convert_topk3();
+};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class INFERENCE_ENGINE_API_CLASS(ConvertScatterElementsToScatter);
+
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::ConvertScatterElementsToScatter: public ngraph::pass::GraphRewrite {
+public:
+ ConvertScatterElementsToScatter() : GraphRewrite() {
+ convert_scatter_elements_to_scatter();
+ }
+
+private:
+ void convert_scatter_elements_to_scatter();
+};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/slice_plan.hpp>
+#include <ngraph/util.hpp>
+
+namespace ngraph {
+namespace pass {
+
+ class INFERENCE_ENGINE_API_CLASS(StridedSliceOptimization);
+ class INFERENCE_ENGINE_API_CLASS(UselessStridedSliceEraser);
+ class INFERENCE_ENGINE_API_CLASS(SharedStridedSliceEraser);
+ class INFERENCE_ENGINE_API_CLASS(GroupedStridedSliceOptimizer);
+ } // namespace pass
+} // namespace ngraph
+
+
+/*
+ * Description:
+ * UselessStridedSliceEraser transformation removes StridedSlice operations
+ * with equal input and output shapes.
+ */
+
+class ngraph::pass::UselessStridedSliceEraser: public ngraph::pass::FunctionPass {
+public:
+ bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
+
+/*
+ * Description:
+ * SharedStridedSliceEraser replaces group of StridedSlice operations with first
+ * StridedSlice in this group. All SrtideSluces in this group must be equal and
+ * consume the same output port.
+ */
+
+class ngraph::pass::SharedStridedSliceEraser: public ngraph::pass::FunctionPass {
+public:
+ bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
+
+/*
+ * Description:
+ * GroupedStridedSliceOptimizer replaces group of StridedSlice operations with VariadicSplit
+ * All StridedSlice operations must slice data with the same axis and stride = 1.
+ */
+
+class ngraph::pass::GroupedStridedSliceOptimizer: public ngraph::pass::FunctionPass {
+public:
+ bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
+
+class ngraph::pass::StridedSliceOptimization: public ngraph::pass::FunctionPass {
+public:
+ bool run_on_function(std::shared_ptr<ngraph::Function> f) override {
+ bool rewritten = UselessStridedSliceEraser().run_on_function(f);
+ rewritten |= SharedStridedSliceEraser().run_on_function(f);
+ rewritten |= GroupedStridedSliceOptimizer().run_on_function(f);
+ return rewritten;
+ }
+};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+ class INFERENCE_ENGINE_API_CLASS(RemoveFilteringBoxesBySize);
+
+} // namespace pass
+} // namespace ngraph
+
+class ngraph::pass::RemoveFilteringBoxesBySize: public ngraph::pass::GraphRewrite {
+public:
+ RemoveFilteringBoxesBySize() : GraphRewrite() {
+ remove_filtering_boxes_by_size();
+ }
+
+private:
+ void remove_filtering_boxes_by_size();
+};
// SPDX-License-Identifier: Apache-2.0
//
-#include "transformations/common_optimizations/common_optimizations.hpp"
-
#include <memory>
+#include "transformations/common_optimizations/common_optimizations.hpp"
+#include "transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp"
+#include "transformations/optimize_strided_slice.hpp"
+#include "transformations/convert_scatter_elements_to_scatter.hpp"
+#include "transformations/remove_filtering_boxes_by_size.hpp"
+#include "transformations/init_node_info.hpp"
+
#include <ngraph/pass/manager.hpp>
#include <ngraph/pass/nop_elimination.hpp>
+#include <ngraph/pass/algebraic_simplification.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+
bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::Function> f) {
ngraph::pass::Manager CommonOptimizations;
#include <transformations/convert_opset1_to_legacy/convert_power_to_power_ie.hpp>
#include <transformations/convert_opset1_to_legacy/convert_prelu_to_relu_ie.hpp>
#include <transformations/convert_opset1_to_legacy/convert_proposal_to_proposal_ie.hpp>
-#include <transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
#include <transformations/convert_reduce_to_pooling.hpp>
#include <transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.hpp>
#include <transformations/convert_subtract.hpp>
#include <transformations/pull_transpose_through_fq.hpp>
#include <transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp>
#include <transformations/convert_opset1_to_legacy/convert_hard_sigmoid_to_hard_sigmoid_ie.hpp>
-#include <transformations/init_node_info.hpp>
#include <ngraph/pass/constant_folding.hpp>
#include <ngraph/pass/manager.hpp>
#include <memory>
#include <vector>
+#include <ngraph/opsets/opset3.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph_ops/prior_box_ie.hpp>
input_2 = convert2->input_value(0).get_node_shared_ptr();
}
- auto shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_1);
- auto shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_2);
+ // the input can be either ShapeOf-1 or ShapeOf-3
+ std::shared_ptr<ngraph::op::Op> shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_1);
+ std::shared_ptr<ngraph::op::Op> shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_2);
if (!shape_of1 || !shape_of2) {
+ shape_of1 = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(input_1);
+ shape_of2 = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(input_2);
+ }
+ if (!shape_of1 || !shape_of2) {
return false;
}
+ // keep this code for a while if will decide to run this transformation again in the opset1->legacy
+ // the input can be either ShapeOf or Convert(ShapeOf)
+// if (!shape_of1 || !shape_of2) {
+// auto shapeof1_convert = std::dynamic_pointer_cast<ngraph::opset1::Convert> (input_1);
+// auto shapeof2_convert = std::dynamic_pointer_cast<ngraph::opset1::Convert> (input_2);
+// if (!shapeof1_convert || !shapeof2_convert)
+// return false;
+// shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf>(shapeof1_convert->input_value(0).get_node_shared_ptr());
+// shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf>(shapeof2_convert->input_value(0).get_node_shared_ptr());
+// if (!shape_of1 || !shape_of2)
+// return false;
+// ops_to_replace.push_back(shapeof1_convert);
+// ops_to_replace.push_back(shapeof2_convert);
+// }
ops_to_replace.push_back(shape_of1);
ops_to_replace.push_back(shape_of2);
input_2 = convert2->input_value(0).get_node_shared_ptr();
}
- auto shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_1);
- auto shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_2);
+ // the input can be either ShapeOf-1 or ShapeOf-3
+ std::shared_ptr<ngraph::op::Op> shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_1);
+ std::shared_ptr<ngraph::op::Op> shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_2);
if (!shape_of1 || !shape_of2) {
+ shape_of1 = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(input_1);
+ shape_of2 = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(input_2);
+ }
+ if (!shape_of1 || !shape_of2) {
return false;
}
+ // keep this code for a while if will decide to run this transformation again in the opset1->legacy
+ // the input can be either ShapeOf or Convert(ShapeOf)
+// if (!shape_of1 || !shape_of2) {
+// auto shapeof1_convert = std::dynamic_pointer_cast<ngraph::opset1::Convert> (input_1);
+// auto shapeof2_convert = std::dynamic_pointer_cast<ngraph::opset1::Convert> (input_2);
+// if (!shapeof1_convert || !shapeof2_convert)
+// return false;
+// shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf>(shapeof1_convert->input_value(0).get_node_shared_ptr());
+// shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf>(shapeof2_convert->input_value(0).get_node_shared_ptr());
+// if (!shape_of1 || !shape_of2)
+// return false;
+// ops_to_replace.push_back(shapeof1_convert);
+// ops_to_replace.push_back(shapeof2_convert);
+// }
ops_to_replace.push_back(shape_of1);
ops_to_replace.push_back(shape_of2);
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_broadcast3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/rt_info.hpp>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+
+void ngraph::pass::ConvertBroadcast3::convert_broadcast3() {
+ auto weights = std::make_shared<pattern::op::Label>(element::f32, Shape {1});
+ auto shp = std::make_shared<pattern::op::Label>(element::i64, Shape {1});
+ auto axes = std::make_shared<pattern::op::Label>(element::i64, Shape {1});
+ auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(weights, shp, axes);
+
+ auto broadcast_no_axes = std::make_shared<ngraph::opset3::Broadcast>(weights, shp);
+
+ ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
+ auto broadcast = std::dynamic_pointer_cast<ngraph::opset3::Broadcast>(m.get_match_root());
+ if (!broadcast) {
+ return false;
+ }
+
+ auto input = broadcast->input_value(0);
+ auto target_shape = broadcast->input_value(1);
+
+ auto last_node = input.get_node_shared_ptr();
+ auto broadcast_type = broadcast->get_broadcast_spec();
+
+ if (broadcast_type == op::BroadcastType::NUMPY) {
+ last_node = std::make_shared<ngraph::opset1::Broadcast>(input, target_shape, op::AutoBroadcastType::NUMPY);
+ ngraph::copy_runtime_info(broadcast, last_node);
+ } else if (broadcast_type == op::BroadcastType::PDPD) {
+ last_node = std::make_shared<ngraph::opset1::Broadcast>(input, target_shape, op::AutoBroadcastType::PDPD);
+ ngraph::copy_runtime_info(broadcast, last_node);
+ } else if (broadcast_type == op::BroadcastType::NONE) {
+ last_node = std::make_shared<ngraph::opset1::Broadcast>(input, target_shape, broadcast->input_value(2), op::AutoBroadcastType::NONE);
+ ngraph::copy_runtime_info(broadcast, last_node);
+ } else if (broadcast_type == op::BroadcastType::BIDIRECTIONAL) {
+ auto constant_one = std::make_shared<ngraph::opset1::Constant>(input.get_element_type(), Shape({1}), std::vector<int>{1});
+ auto broadcast_ones = std::make_shared<ngraph::opset1::Broadcast>(constant_one, target_shape, op::AutoBroadcastType::NUMPY);
+ last_node = std::make_shared<ngraph::opset1::Multiply>(input, broadcast_ones);
+ ngraph::copy_runtime_info(broadcast, {last_node, broadcast_ones, constant_one});
+ }
+
+ last_node->set_friendly_name(broadcast->get_friendly_name());
+
+ ngraph::replace_node(m.get_match_root(), last_node);
+ return true;
+ };
+
+ auto m = std::make_shared<ngraph::pattern::Matcher>(broadcast, "ConvertBroadcast3");
+ auto m_no_axes = std::make_shared<ngraph::pattern::Matcher>(broadcast_no_axes, "ConvertBroadcast3NoAxes");
+ this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+ this->add_matcher(m_no_axes, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_nms3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+void ngraph::pass::ConvertNMS3::convert_nms3() {
+ auto boxes = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 1000, 4});
+ auto scores = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 1, 1000});
+ auto max_output_boxes_per_class = ngraph::opset3::Constant::create(element::i64, Shape{}, {10});
+ auto iou_threshold = ngraph::opset3::Constant::create(element::f32, Shape{}, {0.75});
+ auto score_threshold = ngraph::opset3::Constant::create(element::f32, Shape{}, {0.7});
+ auto nms = std::make_shared<ngraph::opset3::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+ iou_threshold, score_threshold);
+
+ ngraph::graph_rewrite_callback callback = [](pattern::Matcher &m) {
+ auto nms = std::dynamic_pointer_cast<ngraph::opset3::NonMaxSuppression>(m.get_match_root());
+ if (!nms) {
+ return false;
+ }
+
+ Output<Node> last;
+ ngraph::NodeVector new_ops;
+
+ auto new_nms = std::make_shared<ngraph::opset2::NonMaxSuppression>(nms->input_value(0), nms->input_value(1),
+ nms->input_value(2), nms->input_value(3), nms->input_value(4),
+ static_cast<const op::v1::NonMaxSuppression::BoxEncodingType>(nms->get_box_encoding()),
+ nms->get_sort_result_descending());
+
+ new_ops.push_back(new_nms);
+ // if the output is the i32 then it matches behavior of the v1::NonMaxSuppression otherwise need to insert Convert
+ if (nms->get_output_type() == element::i32) {
+ last = new_nms;
+ } else {
+ last = std::make_shared<ngraph::opset2::Convert>(new_nms, nms->get_output_type());
+ new_ops.push_back(last.get_node_shared_ptr());
+ }
+
+ last.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name());
+ ngraph::copy_runtime_info(nms, new_ops);
+ ngraph::replace_node(nms, last.get_node_shared_ptr());
+ return true;
+ };
+
+ auto m = std::make_shared<ngraph::pattern::Matcher>(nms, "ConvertNMS3");
+ this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp"
+
+#include "transformations/convert_opset3_to_opset2/convert_broadcast3.hpp"
+#include "transformations/convert_opset3_to_opset2/convert_nms3.hpp"
+#include "transformations/convert_opset3_to_opset2/convert_shapeof3.hpp"
+#include "transformations/convert_opset3_to_opset2/convert_topk3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/pass/manager.hpp>
+
+bool ngraph::pass::ConvertOpSet3ToOpSet2::run_on_function(std::shared_ptr<ngraph::Function> f) {
+ ngraph::pass::Manager OpSet3ToOpSet2;
+ std::vector<std::shared_ptr<ngraph::pass::PassBase> > transforms;
+
+#define NGRAPH_PASS(NAME, NAMESPACE) transforms.push_back(OpSet3ToOpSet2.register_pass<NAMESPACE::NAME>());
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp>
+#undef NGRAPH_PASS
+
+ for (auto & t : transforms) {
+ if (auto t_param = std::dynamic_pointer_cast<PassParam>(t)) {
+ t_param->setCallback(transformation_callback);
+ }
+ }
+ OpSet3ToOpSet2.run_passes(f);
+ return true;
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_shapeof3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+void ngraph::pass::ConvertShapeOf3::convert_shapeof3() {
+ auto input = std::make_shared<pattern::op::Label>(element::i64, Shape{1, 1, 1, 1});
+ auto shapeof = std::make_shared<ngraph::opset3::ShapeOf>(input);
+
+ ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
+ auto shapeof = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf> (m.get_match_root());
+ if (!shapeof) {
+ return false;
+ }
+
+ Output<Node> last;
+ ngraph::NodeVector new_ops;
+
+ auto new_shapeof = std::make_shared<ngraph::opset1::ShapeOf>(shapeof->input_value(0));
+ new_ops.push_back(new_shapeof);
+ // if the output is the i64 then it matches behavior of the v1::ShapeOf otherwise need to insert Convert
+ if (shapeof->get_output_type() == element::i64) {
+ last = new_shapeof;
+ } else {
+ last = std::make_shared<ngraph::opset1::Convert>(new_shapeof, shapeof->get_output_type());
+ new_ops.push_back(last.get_node_shared_ptr());
+ }
+
+ last.get_node_shared_ptr()->set_friendly_name(shapeof->get_friendly_name());
+ ngraph::copy_runtime_info(shapeof, new_ops);
+ ngraph::replace_node(shapeof, last.get_node_shared_ptr());
+ return true;
+ };
+
+ auto m = std::make_shared<ngraph::pattern::Matcher>(shapeof, "ConvertShapeOf3");
+ this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_topk3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+void ngraph::pass::ConvertTopK3::convert_topk3() {
+ auto input = std::make_shared<pattern::op::Label>(element::i64, Shape{1, 1, 1, 1});
+ auto k = ngraph::opset3::Constant::create(element::i64, Shape{}, {10});
+ auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 0, "min", "value", element::i64);
+ // this is a temporary workaround to avoid bug that TopK-3 does not have clone_with_new_inputs so the TopK-3 clone
+ // generates TopK-1 operation
+ auto topk_v1 = std::make_shared<ngraph::opset1::TopK>(input, k, 0, "min", "value", element::i64);
+
+ ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
+ std::shared_ptr<ngraph::op::v1::TopK> topk = std::dynamic_pointer_cast<ngraph::opset3::TopK> (m.get_match_root());
+ if (!topk) {
+ topk = std::dynamic_pointer_cast<ngraph::opset1::TopK> (m.get_match_root());
+ }
+ if (!topk) {
+ return false;
+ }
+ Output<Node> last;
+ ngraph::NodeVector new_ops;
+
+ auto new_topk = std::make_shared<ngraph::opset2::TopK>(topk->input_value(0), topk->input_value(1),
+ topk->get_axis(), topk->get_mode(), topk->get_sort_type(), element::i32);
+ new_ops.push_back(new_topk);
+ // if the output is the i32 then it matches behavior of the v1::TopK otherwise need to insert Convert
+ if (topk->get_index_element_type() == element::i32) {
+ last = new_topk->output(1);
+ } else {
+ last = std::make_shared<ngraph::opset2::Convert>(new_topk->output(1), topk->get_index_element_type());
+ new_ops.push_back(last.get_node_shared_ptr());
+ }
+
+ new_topk->set_friendly_name(topk->get_friendly_name());
+ ngraph::copy_runtime_info(topk, new_ops);
+ topk->output(0).replace(new_topk->output(0));
+ topk->output(1).replace(last);
+ return true;
+ };
+
+ auto m = std::make_shared<ngraph::pattern::Matcher>(topk, "ConvertTopK3");
+ this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+ auto m2 = std::make_shared<ngraph::pattern::Matcher>(topk_v1, "ConvertTopK3");
+ this->add_matcher(m2, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_scatter_elements_to_scatter.hpp"
+
+#include <memory>
+#include <vector>
+#include <numeric>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/validation_util.hpp>
+
+void ngraph::pass::ConvertScatterElementsToScatter::convert_scatter_elements_to_scatter() {
+ auto data = std::make_shared<pattern::op::Label>(element::f32, Shape{1});
+ auto indices = std::make_shared<pattern::op::Label>(element::i64, Shape{1});
+ auto updates = std::make_shared<pattern::op::Label>(element::f32, Shape{1});
+ auto axis = ngraph::opset3::Constant::create(element::i64, {1}, {0});
+
+ auto broadcast_shape = std::make_shared<pattern::op::Label>(element::i64, Shape{1});
+ auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(indices, broadcast_shape);
+
+ auto scatter = std::make_shared<ngraph::opset3::ScatterElementsUpdate>(data, broadcast, updates, axis);
+
+ ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
+ auto scatter = m.get_match_root();
+ auto broadcast = scatter->input_value(1).get_node_shared_ptr();
+ auto axis_const = std::dynamic_pointer_cast<ngraph::opset3::Constant>(scatter->input_value(3).get_node_shared_ptr());
+
+ if (!axis_const) {
+ return false;
+ }
+
+ auto indices_input = broadcast->input_value(0);
+
+ const auto data_pshape = scatter->input(0).get_partial_shape();
+ const auto indices_pshape = indices_input.get_partial_shape();
+ const auto updates_pshape = scatter->input(2).get_partial_shape();
+
+ // Check that ScatterElementsUpdate and Broadcast inputs has static shapes
+ if (data_pshape.rank().is_dynamic() || indices_pshape.rank().is_dynamic() || updates_pshape.rank().is_dynamic()) {
+ return false;
+ }
+
+ const uint64_t data_rank = data_pshape.rank().get_length();
+ const uint64_t updates_rank = updates_pshape.rank().get_length();
+ const uint64_t indices_rank = indices_pshape.rank().get_length();
+
+
+ // Check that axis Constant has {} or {1} shape
+ if (shape_size(axis_const->get_shape()) > 1) {
+ return false;
+ }
+
+ const size_t axis = ngraph::normalize_axes(scatter->get_friendly_name(),
+ axis_const->cast_vector<int64_t>(),
+ data_pshape.rank())[0];
+
+ struct Range {
+ uint64_t l, r;
+ Range(const uint64_t & l, const uint64_t & r) : l(l), r(r) {
+ if (l > r) throw ngraph_error("Range values are inconsistent");
+ }
+
+ uint64_t size() const {
+ return r - l;
+ }
+
+ bool operator!= (const Range & rhs) const {
+ return (r - l != rhs.r - rhs.l);
+ }
+
+ static
+ bool is_valid(const int64_t & l, const int64_t & r) {
+ return (l >= 0 && l <= r);
+ }
+
+ static
+ bool is_empty(const uint64_t & l, const uint64_t & r) {
+ return l == r;
+ }
+ };
+
+ auto compare_shapes_ranges = [](const PartialShape & lhsShape, const PartialShape & rhsShape, const Range & lhsRange, const Range & rhsRange) -> bool {
+ // Check that ranges are equal and suits to Shapes sizes
+ if (lhsRange != rhsRange ||
+ lhsRange.r > lhsShape.rank().get_length() ||
+ rhsRange.r > rhsShape.rank().get_length()) {
+ return false;
+ }
+
+ // Check that Shape values in ranges are equal
+ for (size_t lhsIndex = lhsRange.l, rhsIndex = rhsRange.l; lhsIndex < lhsRange.r; ++lhsIndex, ++rhsIndex) {
+ if (lhsShape[lhsIndex].is_dynamic() || rhsShape[rhsIndex].is_dynamic() ||
+ lhsShape[lhsIndex] != rhsShape[rhsIndex]) {
+ return false;
+ }
+ }
+
+ return true;
+ };
+
+ auto product = [](const Shape & shape, const Range & range) -> uint64_t {
+ uint64_t prod(1);
+ for (size_t dim = range.l; dim < range.r; ++dim) {
+ prod *= shape[dim];
+ }
+ return prod;
+ };
+
+ /* To transform ScatterElementsUpdate to ScatterUpdate input shapes must match this rules:
+ *
+ * data_shape[d_0, d_1, ... , d_n]
+ *
+ * indices_shape[i_0, i_1, ... , i_n]
+ *
+ * updates_shape[d_0, d_1, i_0(axis), i_1, ... , i_n, d_axis + 1, ... , d_n]
+ *
+ * EXAMPLE:
+ * In this example the input shapes are suits the rules above and ScatterElementsUpdate can be replaced with ScatterUpdate
+ *
+ * axis = 1 | (axis)
+ * \/
+ *
+ * data_shape [1000, 256, 10, 15]
+ *
+ * index_shape [ 125, 2 ]
+ *
+ * updates_shape [1000, 125, 2, 10, 15]
+ *
+ */
+
+ // data_shape and updates_shape dims must be equal up to axis dimension
+ if (!compare_shapes_ranges(data_pshape, updates_pshape, {0, axis}, {0, axis})) {
+ return false;
+ }
+
+ // data_shape dims starting right after axis dim must match last updates_shape dimensions
+ if (!Range::is_valid(updates_rank - (data_rank - (axis + 1)), updates_rank)) {
+ return false;
+ }
+
+ const Range updates_last{updates_rank - (data_rank - (axis + 1)), updates_rank};
+ if (!compare_shapes_ranges(data_pshape, updates_pshape, {axis + 1, data_rank}, updates_last)) {
+ return false;
+ }
+
+ // indices_shape dims product must match updates_shape dims starting from axis dimension
+ if (!Range::is_valid(axis, updates_last.l) && !Range::is_empty(axis, updates_last.l)) {
+ return false;
+ }
+
+ NodeVector new_ops;
+
+ // In case of static shapes we check that indices dims product match with updates dims
+ if (updates_pshape.is_static() && indices_pshape.is_static()) {
+ const auto updated_range_prod = product(updates_pshape.get_shape(), {axis, updates_last.l});
+ const auto indices_range_prod = product(indices_pshape.get_shape(), {0, indices_rank});
+
+ if (updated_range_prod != indices_range_prod) {
+ return false;
+ }
+
+ // if indices_shape do not match updates_shape dims{axis, updates_last.l}
+ // we reshape indices to updates_shape
+ const auto updates_shape = updates_pshape.get_shape();
+ const auto indices_shape = indices_pshape.get_shape();
+ Shape indices_new_shape(updates_shape.begin() + axis, updates_shape.begin() + updates_last.l);
+ if (indices_shape != indices_new_shape) {
+ indices_input = std::make_shared<ngraph::opset3::Reshape>(indices_input,
+ opset3::Constant::create(element::i64, Shape{indices_new_shape.size()}, indices_new_shape), false);
+ new_ops.push_back(indices_input.get_node_shared_ptr());
+ }
+ } else {
+ // Tight constrain for dynamic case:
+ // 1. indices_pshape 1...N dimensions must be equal to 1
+ // 2. updates_pshape axis interval size = 1
+
+ for (size_t dim = 1; dim < indices_rank; ++dim) {
+ if (indices_pshape[dim] != 1) return false;
+ }
+
+ if (Range(axis, updates_last.l).size() != 1) {
+ return false;
+ }
+
+ // Squeeze 1 dims for indices input
+ if (indices_rank > 1) {
+ std::vector<int64_t> squeeze_axes(indices_rank - 1ul);
+ std::iota(squeeze_axes.begin(), squeeze_axes.end(), 1);
+ indices_input = std::make_shared<ngraph::opset3::Squeeze>(indices_input,
+ opset3::Constant::create(element::i64, Shape{squeeze_axes.size()}, squeeze_axes));
+ new_ops.push_back(indices_input.get_node_shared_ptr());
+ }
+ }
+
+ auto scatter_update = std::make_shared<ngraph::opset3::ScatterUpdate>(scatter->input_value(0),
+ indices_input,
+ scatter->input_value(2),
+ scatter->input_value(3));
+ new_ops.push_back(scatter_update);
+ scatter_update->set_friendly_name(scatter->get_friendly_name());
+ ngraph::copy_runtime_info({scatter, broadcast}, {new_ops});
+ ngraph::replace_node(scatter, scatter_update);
+ return true;
+ };
+
+ auto m = std::make_shared<ngraph::pattern::Matcher>(scatter, "ConvertScatterElementsToScatter");
+ this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <vector>
+
+#include <transformations/optimize_strided_slice.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+bool ngraph::pass::UselessStridedSliceEraser::run_on_function(std::shared_ptr<ngraph::Function> f) {
+ bool rewritten = false;
+ for (auto & node : f->get_ordered_ops()) {
+ auto ss = std::dynamic_pointer_cast<ngraph::opset1::StridedSlice>(node);
+ if (!ss || ss->get_output_partial_shape(0).is_dynamic() || ss->get_input_partial_shape(0).is_dynamic())
+ continue;
+ if (ss->input(0).get_shape() != ss->output(0).get_shape())
+ continue;
+ rewritten |= replace_output_update_name(ss->output(0), ss->input_value(0));
+ }
+ return rewritten;
+}
+
+ngraph::SlicePlan get_slice_plan(std::shared_ptr<ngraph::opset1::StridedSlice> slice) {
+ auto convert_mask_to_axis_set = [](const std::vector<int64_t>& mask) {
+ ngraph::AxisSet axis_set{};
+ for (size_t i = 0; i < static_cast<size_t>(mask.size()); ++i) {
+ if (mask[i] == 1)
+ axis_set.emplace(i);
+ }
+ return axis_set;
+ };
+
+ auto data = slice->input_value(0).get_node_shared_ptr();
+ auto begin = std::dynamic_pointer_cast<ngraph::opset1::Constant>(slice->input_value(1).get_node_shared_ptr());
+ auto end = std::dynamic_pointer_cast<ngraph::opset1::Constant>(slice->input_value(2).get_node_shared_ptr());
+ auto strides = std::dynamic_pointer_cast<ngraph::opset1::Constant>(slice->input_value(3).get_node_shared_ptr());
+ if (!begin || !end || !strides || slice->input(0).get_partial_shape().is_dynamic())
+ return ngraph::SlicePlan();
+
+ auto begin_vec = begin->cast_vector<int64_t>();
+ auto end_vec = end->cast_vector<int64_t>();
+ auto strides_vec = strides->cast_vector<int64_t>();
+ const auto begin_mask = convert_mask_to_axis_set(slice->get_begin_mask());
+ const auto end_mask = convert_mask_to_axis_set(slice->get_end_mask());
+
+ ngraph::SlicePlan plan = ngraph::make_slice_plan(slice->input(0).get_shape(),
+ begin_vec,
+ end_vec,
+ strides_vec,
+ begin_mask,
+ end_mask,
+ convert_mask_to_axis_set(slice->get_new_axis_mask()),
+ convert_mask_to_axis_set(slice->get_shrink_axis_mask()),
+ convert_mask_to_axis_set(slice->get_ellipsis_mask()));
+ return plan;
+}
+
+
+bool strided_slices_perform_the_same(std::shared_ptr<ngraph::opset1::StridedSlice> lhs,
+ std::shared_ptr<ngraph::opset1::StridedSlice> rhs) {
+ auto lhs_plan = get_slice_plan(lhs);
+ auto rhs_plan = get_slice_plan(rhs);
+
+ auto empty_plan = ngraph::SlicePlan();
+ if (lhs_plan == empty_plan || rhs_plan == empty_plan)
+ return false;
+ return lhs_plan == rhs_plan;
+}
+
+bool ngraph::pass::SharedStridedSliceEraser::run_on_function(std::shared_ptr<ngraph::Function> f) {
+ bool graph_rewritten = false;
+
+ std::map<ngraph::Output<Node>, std::vector<std::shared_ptr<ngraph::opset1::StridedSlice>>> source_to_ss;
+ for (const auto & node : f->get_ordered_ops()) {
+ if (auto ss = std::dynamic_pointer_cast<ngraph::opset1::StridedSlice>(node)) {
+ source_to_ss[ss->input_value(0)].push_back(ss);
+ }
+ }
+
+ for (auto& pair : source_to_ss) {
+ if (pair.second.size() < 2)
+ continue;
+ auto root_ss = pair.second[0];
+ for (auto& child_ss : pair.second) {
+ if (root_ss->get_instance_id() != child_ss->get_instance_id() && strided_slices_perform_the_same(root_ss, child_ss)) {
+ graph_rewritten |= replace_output_update_name(child_ss->output(0), root_ss->output(0));
+ }
+ }
+ }
+ return graph_rewritten;
+}
+
+bool ngraph::pass::GroupedStridedSliceOptimizer::run_on_function(std::shared_ptr<ngraph::Function> f) {
+ bool graph_rewritten = false;
+ using planned_slice = std::pair<std::shared_ptr<ngraph::opset1::StridedSlice>, ngraph::SlicePlan>;
+
+ std::map<ngraph::Output<Node>, std::vector<planned_slice>> source_to_ss_with_plan;
+ for (const auto & node : f->get_ordered_ops()) {
+ if (auto ss = std::dynamic_pointer_cast<ngraph::opset1::StridedSlice>(node)) {
+ auto slice_plan = get_slice_plan(ss);
+ if (slice_plan == ngraph::SlicePlan())
+ continue;
+ source_to_ss_with_plan[ss->input_value(0)].push_back({ss, slice_plan});
+ }
+ }
+
+ for (auto& pair : source_to_ss_with_plan) {
+ if (pair.second.size() < 2)
+ continue;
+
+ bool valid_for_replacement = true;
+
+ auto root_plan = pair.second[0].second;
+ for (const auto & ss_plan : pair.second) {
+ valid_for_replacement &= (ss_plan.second.begins.size() == root_plan.begins.size());
+ valid_for_replacement &= (ss_plan.first->get_ellipsis_mask().empty() &&
+ ss_plan.first->get_new_axis_mask().empty() &&
+ ss_plan.first->get_shrink_axis_mask().empty());
+ }
+
+ if (!valid_for_replacement) continue;
+
+ auto input_shape = pair.first.get_shape();
+ auto axis = -1;
+
+ using OutputToPatrition = struct {
+ Output<Node> output;
+ int64_t begin;
+ int64_t end;
+ };
+
+ std::vector<OutputToPatrition> output_to_partition;
+ for (size_t i = 0; i < input_shape.size(); ++i) {
+ for (const auto & ss_plan : pair.second) {
+ if (ss_plan.second.begins[i] != 0 || ss_plan.second.ends[i] != input_shape[i]) {
+ if (axis == -1 || axis == i)
+ axis = i;
+ else
+ valid_for_replacement = false;
+ if (ss_plan.second.strides[i] != 1)
+ valid_for_replacement = false;
+ output_to_partition.push_back({ss_plan.first->output(0), ss_plan.second.begins[i], ss_plan.second.ends[i]});
+ }
+ if (!valid_for_replacement) break;
+ }
+ if (!valid_for_replacement) break;
+ }
+
+ if (!valid_for_replacement) continue;
+ if (output_to_partition.size() < 2) continue;
+
+ std::sort(output_to_partition.begin(), output_to_partition.end(),
+ [](OutputToPatrition lhs, OutputToPatrition rhs)
+ {return lhs.begin < rhs.begin;});
+
+ std::vector<std::pair<Output<Node>, uint64_t>> output_to_size;
+ uint64_t prev_r = 0;
+ for (auto & record : output_to_partition) {
+ valid_for_replacement &= (record.begin >= prev_r);
+ prev_r = record.end;
+ }
+ valid_for_replacement &= (prev_r <= input_shape[axis]);
+ if (!valid_for_replacement) continue;
+
+ prev_r = 0;
+ Output<Node> fake_output;
+ for (auto & record : output_to_partition) {
+ if (record.begin > prev_r)
+ output_to_size.emplace_back(fake_output, record.begin - prev_r);
+ prev_r = record.end;
+ output_to_size.emplace_back(record.output, record.end - record.begin);
+ }
+ if (prev_r < input_shape[axis]) {
+ output_to_size.emplace_back(fake_output, input_shape[axis] - prev_r);
+ }
+
+ auto axis_const = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {axis});
+
+ std::vector<int64_t> size_splits;
+ for (const auto & item : output_to_size)
+ size_splits.push_back(item.second);
+ auto size_splits_const = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{size_splits.size()}, size_splits);
+ auto variadic_split = std::make_shared<ngraph::opset1::VariadicSplit>(pair.first, axis_const, size_splits_const);
+
+ auto i = 0;
+ NodeVector ops_to_replace;
+ for (auto & record : output_to_size) {
+ if (record.first == fake_output) {
+ std::make_shared<ngraph::opset1::Result>(variadic_split->output(i));
+ } else {
+ record.first.replace(variadic_split->output(i));
+ ops_to_replace.push_back(record.first.get_node_shared_ptr());
+ }
+ ++i;
+ }
+ copy_runtime_info(ops_to_replace, variadic_split);
+ }
+ return graph_rewritten;
+}
+
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+#include "transformations/remove_filtering_boxes_by_size.hpp"
+
+void ngraph::pass::RemoveFilteringBoxesBySize::remove_filtering_boxes_by_size() {
+ // variadic split
+ auto data = std::make_shared<pattern::op::Label>(element::f32, Shape{1000, 4});
+ auto sizes = opset3::Constant::create(element::i64, Shape{4}, std::vector<int64_t >({1, 1, 1, 1}));
+ auto axis = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+ auto split = std::make_shared<ngraph::opset3::VariadicSplit>(data, axis, sizes);
+
+ // sub -> add
+ auto sub_2_0 = std::make_shared<ngraph::opset3::Subtract>(split->output(2), split->output(0));
+ auto term_1 = std::make_shared<pattern::op::Label>(element::f32, Shape{1});
+ auto add_1 = std::make_shared<ngraph::opset3::Add>(sub_2_0, term_1);
+
+ auto sub_3_1 = std::make_shared<ngraph::opset3::Subtract>(split->output(3), split->output(1));
+ auto term_2 = std::make_shared<pattern::op::Label>(element::f32, Shape{1});
+ auto add_2 = std::make_shared<ngraph::opset3::Add>(sub_3_1, term_2);
+
+ // concat
+ auto concat = std::make_shared<ngraph::opset3::Concat>(ngraph::OutputVector({split->output(0), split->output(1), add_1->output(0), add_2->output(0)}), 1);
+
+ // second variadic split
+ auto sizes_1 = opset3::Constant::create(element::i64, Shape{4}, std::vector<int64_t >({1, 1, 1, 1}));
+ auto axis_1 = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+ auto split_1 = std::make_shared<ngraph::opset3::VariadicSplit>(concat, axis_1, sizes_1);
+
+ // squeeze
+ auto squeeze_1_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+ auto squeeze_1 = std::make_shared<ngraph::opset3::Squeeze>(split_1->output(2), squeeze_1_axis);
+
+ auto squeeze_2_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+ auto squeeze_2 = std::make_shared<ngraph::opset3::Squeeze>(split_1->output(3), squeeze_2_axis);
+
+ // less
+ auto less_1_constant = opset3::Constant::create(element::f32, Shape{1}, std::vector<float >({0}));
+ auto less_1 = std::make_shared<ngraph::opset3::Less>(squeeze_1, less_1_constant);
+
+ auto less_2_constant = opset3::Constant::create(element::f32, Shape{1}, std::vector<float >({0}));
+ auto less_2 = std::make_shared<ngraph::opset3::Less>(squeeze_2, less_2_constant);
+
+ // Logical Not
+ auto not_1 = std::make_shared<ngraph::opset3::LogicalNot>(less_1);
+ auto not_2 = std::make_shared<ngraph::opset3::LogicalNot>(less_2);
+
+ // cast
+ auto cast_11 = std::make_shared<ngraph::opset3::Convert>(not_1, ngraph::element::u8);
+ auto cast_12 = std::make_shared<ngraph::opset3::Convert>(cast_11, ngraph::element::boolean);
+
+ auto cast_21 = std::make_shared<ngraph::opset3::Convert>(not_2, ngraph::element::u8);
+ auto cast_22 = std::make_shared<ngraph::opset3::Convert>(cast_21, ngraph::element::boolean);
+
+ // logical and
+ auto and_1 = std::make_shared<ngraph::opset3::LogicalAnd>(cast_12, cast_22);
+
+ // cast
+ auto cast_31 = std::make_shared<ngraph::opset3::Convert>(and_1, ngraph::element::u8);
+ auto cast_32 = std::make_shared<ngraph::opset3::Convert>(cast_31, ngraph::element::f32);
+
+ // nonzero
+ auto non_zero = std::make_shared<ngraph::opset3::NonZero>(cast_32);
+
+ auto order = opset3::Constant::create(element::i64, Shape{2}, std::vector<int64_t >({1, 0}));
+ auto transpose = std::make_shared<ngraph::opset3::Transpose>(non_zero, order);
+
+ auto squeeze_3_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+ auto squeeze_3 = std::make_shared<ngraph::opset3::Squeeze>(transpose, squeeze_3_axis);
+
+ auto cast = std::make_shared<ngraph::opset3::Convert>(squeeze_3, ngraph::element::i64);
+
+ ngraph::graph_rewrite_callback callback = [data](pattern::Matcher& m) {
+ auto start = opset3::Constant::create(element::i64, Shape{}, std::vector<int64_t >({0}));
+ auto step = opset3::Constant::create(element::i64, Shape{}, std::vector<int64_t >({1}));
+
+ auto pattern_map = m.get_pattern_map();
+
+ auto input = pattern_map[data];
+ auto output = m.get_match_root();
+
+ auto input_shape = std::make_shared<ngraph::opset3::ShapeOf>(input);
+
+ auto axis = opset3::Constant::create(element::i64, Shape{}, std::vector<int64_t >({0}));
+ auto index = opset3::Constant::create(element::i64, Shape{}, std::vector<int64_t >({0}));
+ auto stop = std::make_shared<ngraph::opset3::Gather>(input_shape, index, axis);
+
+ auto range = std::make_shared<ngraph::opset3::Range>(start, stop, step);
+
+ range->set_friendly_name(output->get_friendly_name());
+ // TODO: add copy_runtime_info
+ ngraph::replace_node(output, range);
+
+ return true;
+ };
+
+ auto m = std::make_shared<ngraph::pattern::Matcher>(cast, "RemoveFilteringBoxesBySize");
+ this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/node.hpp>
+#include <ngraph/op/op.hpp>
+
+namespace ngraph { namespace vpu { namespace op {
+
+class OutShapeOfReshape : public ngraph::op::Op {
+public:
+ static constexpr NodeTypeInfo type_info{"OutShapeOfReshape", 1};
+ const NodeTypeInfo& get_type_info() const override { return type_info; }
+
+ OutShapeOfReshape(
+ const Output<Node>& inDataShape,
+ const Output<Node>& outShapeDescriptor,
+ bool specialZero);
+
+ void validate_and_infer_types() override;
+
+ std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+ bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+
+ bool getSpecialZero() const { return m_specialZero; }
+ void setSpecialZero(bool special_zero) { m_specialZero = special_zero; }
+
+private:
+ bool m_specialZero;
+};
+
+} // namespace op
+} // namespace vpu
+} // namespace ngraph
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/broadcast_base.hpp"
+#include "ngraph/op/util/attr_types.hpp"
+
+#include <memory>
+#include <vector>
+
+namespace ngraph { namespace vpu { namespace op {
+
+class StaticShapeBroadcast : public ::ngraph::op::util::BroadcastBase {
+public:
+ static constexpr NodeTypeInfo type_info{"StaticShapeBroadcast", 0};
+
+ const NodeTypeInfo& get_type_info() const override { return type_info; }
+
+ StaticShapeBroadcast(const Output<Node>& arg,
+ const Output<Node>& targetShape,
+ const Output<Node>& axesMapping,
+ const ngraph::op::BroadcastModeSpec& broadcastSpec = ngraph::op::BroadcastType::EXPLICIT);
+
+ StaticShapeBroadcast(const Output<Node>& arg,
+ const Output<Node>& targetShape,
+ const ngraph::op::BroadcastModeSpec& broadcastSpec = ngraph::op::BroadcastType::NUMPY);
+
+ void validate_and_infer_types() override;
+
+ std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& newInputs) const override;
+
+ bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+
+ PartialShape getEvaluatedShape() const { return m_evaluatedOutputShape; }
+ void setEvaluatedShape(const PartialShape& shape) { m_evaluatedOutputShape = shape; }
+
+private:
+ PartialShape m_evaluatedOutputShape;
+};
+
+} // namespace op
+} // namespace vpu
+} // namespace ngraph
const NodeTypeInfo& get_type_info() const override { return type_info; }
- explicit StaticShapeNonZero(const Output<ngraph::Node>& input);
+ explicit StaticShapeNonZero(const Output<ngraph::Node>& input, const element::Type& output_type = element::i64);
void validate_and_infer_types() override;
std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const override;
bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+
+ bool evaluate(const HostTensorVector& output_values,
+ const HostTensorVector& input_values) override;
+
+ element::Type get_output_type() const { return m_output_type; }
+ void set_output_type(element::Type output_type) { m_output_type = output_type; }
+ // Overload collision with method on Node
+ using Node::set_output_type;
+
+protected:
+ element::Type m_output_type;
};
} // namespace op
class DynamicToStaticShape {
public:
explicit DynamicToStaticShape(const Transformations& specificTransformations = {});
- void transform(ngraph::Function& function) const;
+ void transform(std::shared_ptr<ngraph::Function> function) const;
private:
Transformations transformations;
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeBroadcast(std::shared_ptr<ngraph::Node> target);
+
+} // namespace vpu
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeConcat(std::shared_ptr<ngraph::Node> target);
+
+} // namespace vpu
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeGather(std::shared_ptr<ngraph::Node> node);
+
+} // namespace vpu
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeReshape(std::shared_ptr<ngraph::Node> transpose);
+
+} // namespace vpu
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace vpu {
+
+class DynamicToStaticShapeShapeOf : public ngraph::pass::GraphRewrite {
+public:
+ DynamicToStaticShapeShapeOf();
+};
+
+} //namespace vpu
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeVariadicSplit(std::shared_ptr<ngraph::Node> node);
+
+} // namespace vpu
#include <string>
#include <utility>
+#include "error.hpp"
+#include <vpu/utils/optional.hpp>
#include <vpu/utils/small_vector.hpp>
//
namespace vpu {
-class SimpleMathExpression final {
+template <typename T>
+Optional<int> parseNumber(const std::string& s) {
+ T value;
+ if ((std::istringstream(s) >> value >> std::ws).eof()) {
+ return {value};
+ }
+ return {};
+}
+
+namespace details {
+
+#define OPERATOR(OP) \
+ IntOrFloat operator OP(const IntOrFloat &other) const { \
+ if (isInt && other.isInt) { \
+ return IntOrFloat{value.i OP other.value.i}; \
+ } \
+ const float lhs = isInt ? value.i : value.f; \
+ const float rhs = other.isInt ? other.value.i : other.value.f; \
+ return IntOrFloat{lhs OP rhs}; \
+ }
+
+class IntOrFloat final {
+ union {
+ int i;
+ float f;
+ } value{};
+ bool isInt = true;
+
public:
- void setVariables(const std::map<char, int>& vars) { _vars = vars; }
+ explicit IntOrFloat(int x) : isInt{true} {
+ value.i = x;
+ }
+ explicit IntOrFloat(float x) : isInt{false} {
+ value.f = x;
+ }
+ explicit IntOrFloat(const std::string& x) {
+ const auto integer = parseNumber<int>(x);
+ if (integer.hasValue()) {
+ *this = IntOrFloat(integer.get());
+ return;
+ }
+ const auto fp = parseNumber<float>(x);
+ if (fp.hasValue()) {
+ *this = IntOrFloat(fp.get());
+ return;
+ }
+ VPU_THROW_FORMAT("Failed to convert string to number: '%s'", x);
+ }
- void parse(const std::string& expression);
+ explicit operator std::string() const {
+ return isInt ? std::to_string(value.i) : std::to_string(value.f);
+ }
+
+ float toFloat() const { return isInt ? static_cast<float>(value.i) : value.f; }
+ OPERATOR(+)
+ OPERATOR(-)
+ OPERATOR(*)
+ OPERATOR(/)
+
+ IntOrFloat operator %(const IntOrFloat & other) const {
+ if (isInt && other.isInt) {
+ return IntOrFloat{value.i % other.value.i};
+ }
+ THROW_IE_EXCEPTION << "Can't apply modulus operation to floating point value";
+ }
+};
+
+} // namespace details
+
+class MathExpression final {
+public:
+ void setVariables(const std::map<std::string, std::string>& variables) {
+ for (const auto& var : variables) {
+ // if string converts to float, it also will be able to convert to int
+ if (parseNumber<float>(var.second).hasValue()) {
+ _vars.emplace(var.first, details::IntOrFloat{var.second});
+ }
+ }
+ }
+
+ void parse(const std::string& expression);
int evaluate() const;
private:
- struct Token final {
- enum TokenType {
- Value,
- Operator,
- };
+ enum class TokenType {
+ Value,
+ Operator,
+ Function
+ };
+ struct Token {
TokenType type;
- int value;
- char op;
+ details::IntOrFloat value;
+ std::string opName;
- explicit Token(TokenType t = Value, int v = 0, char o = 0) : type(t), value(v), op(o) {}
+ explicit Token(TokenType type, details::IntOrFloat value, std::string name)
+ : type(type), value(value), opName(std::move(name)) {}
};
-private:
- std::map<char, int> _vars;
+ std::map<std::string, details::IntOrFloat> _vars;
SmallVector<Token> _parsedTokens;
};
const auto& dataElementType = get_input_element_type(0);
NODE_VALIDATION_CHECK(this, dataElementType.is_static(), "(", get_friendly_name(), ") does not support dynamic element type for data tensor");
const auto& dimsElementType = get_input_element_type(1);
- NODE_VALIDATION_CHECK(this, dimsElementType.is_static() && dimsElementType.compatible(ngraph::element::i64), "(", get_friendly_name(),
- ") supports only i64 number type for dims tensor, but ", dimsElementType, " provided");
+ NODE_VALIDATION_CHECK(this, dimsElementType.is_static() && (dimsElementType.compatible(ngraph::element::i64) ||
+ dimsElementType.compatible(ngraph::element::i32)),
+ "(", get_friendly_name(), ") supports only i64 and i32 number type for dims tensor, but ", dimsElementType, " provided");
const auto& dataShape = get_input_shape(0);
const auto& dimsShape = get_input_shape(1);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/out_shape_of_reshape.hpp"
+
+namespace ngraph { namespace vpu { namespace op {
+
+constexpr NodeTypeInfo OutShapeOfReshape::type_info;
+
+OutShapeOfReshape::OutShapeOfReshape(
+ const Output<Node>& inDataShape,
+ const Output<Node>& outShapeDescriptor,
+ bool specialZero) : Op({inDataShape, outShapeDescriptor}), m_specialZero(specialZero) {
+ constructor_validate_and_infer_types();
+}
+
+void OutShapeOfReshape::validate_and_infer_types() {
+ NODE_VALIDATION_CHECK(this, get_input_size() == 2,
+ "OutShapeOfReshape (", get_friendly_name(),
+ ") must have only 2 inputs, provided: ", get_input_size());
+
+ const auto& inDataShapeTensorShape = get_input_partial_shape(0);
+ NODE_VALIDATION_CHECK(this, inDataShapeTensorShape.is_static(),
+ "OutShapeOfReshape (", get_friendly_name(),
+ ") doesn't support dynamic input data shape");
+ NODE_VALIDATION_CHECK(this, inDataShapeTensorShape.rank().get_length() == 1,
+ "OutShapeOfReshape (", get_friendly_name(),
+ ") must have input data shape tensor with rank 1, provided: ",
+ inDataShapeTensorShape.rank().get_length());
+
+ const auto& outShapeDescriptorTensorShape = get_input_partial_shape(1);
+ NODE_VALIDATION_CHECK(this, outShapeDescriptorTensorShape.is_static(),
+ "OutShapeOfReshape (", get_friendly_name(),
+ ") doesn't support dynamic output shape descriptor");
+ NODE_VALIDATION_CHECK(this, outShapeDescriptorTensorShape.rank().get_length() == 1,
+ "OutShapeOfReshape (", get_friendly_name(),
+ ") must have output shape descriptor tensor with rank 1, provided: ",
+ outShapeDescriptorTensorShape.rank().get_length());
+
+ const auto& inDataShapeTensorType = get_input_element_type(0);
+ NODE_VALIDATION_CHECK(this,
+ inDataShapeTensorType.is_static() &&
+ inDataShapeTensorType.is_integral_number(),
+ "OutShapeOfReshape (", get_friendly_name(),
+ ") input data type needs to be an integral type. Got: ",
+ inDataShapeTensorType);
+ const auto& outShapeDescriptorTensorType = get_input_element_type(1);
+ NODE_VALIDATION_CHECK(this,
+ outShapeDescriptorTensorType.is_static() &&
+ outShapeDescriptorTensorType.is_integral_number(),
+ "OutShapeOfReshape (", get_friendly_name(),
+ ") shape descriptor type needs to be an integral type. Got: ",
+ outShapeDescriptorTensorType);
+
+ set_output_type(0, element::i64, outShapeDescriptorTensorShape);
+}
+
+std::shared_ptr<Node> OutShapeOfReshape::clone_with_new_inputs(const OutputVector& new_args) const {
+ check_new_args_count(this, new_args);
+ return std::make_shared<OutShapeOfReshape>(new_args.at(0), new_args.at(1), m_specialZero);
+}
+
+bool OutShapeOfReshape::visit_attributes(ngraph::AttributeVisitor& visitor) {
+ visitor.on_attribute("special_zero", m_specialZero);
+ return true;
+}
+
+
+} // namespace op
+} // namespace vpu
+} // namespace ngraph
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/static_shape_broadcast.hpp"
+
+#include "vpu/utils/error.hpp"
+
+#include "ngraph/opsets/opset3.hpp"
+#include "ngraph/evaluator.hpp"
+
+namespace ngraph { namespace vpu { namespace op {
+
+namespace {
+
+HostTensorVector evaluateShapeOf(Node* node, const HostTensorVector&) {
+ auto shapeOf = as_type<opset3::ShapeOf>(node);
+ const auto inputValue = shapeOf->input_value(0);
+ const auto outputValue = shapeOf->output(0);
+ const auto inputTensors =
+ HostTensorVector{std::make_shared<runtime::HostTensor>(inputValue)};
+ const auto outputTensors =
+ HostTensorVector{std::make_shared<runtime::HostTensor>(outputValue)};
+
+ shapeOf->evaluate(outputTensors, inputTensors);
+ return outputTensors;
+}
+
+HostTensorVector evaluateConstant(Node* node, const HostTensorVector&) {
+ const auto constantNode = as_type<opset3::Constant>(node);
+ const auto constant = std::make_shared<opset3::Constant>(*constantNode);
+
+ const auto outputTensor = std::make_shared<runtime::HostTensor>(constant);
+
+ return {outputTensor};
+}
+
+HostTensorVector evaluateOp(Node* node, const HostTensorVector& inputTensors) {
+ HostTensorVector outputTensors;
+ for (const auto& output : node->outputs()) {
+ outputTensors.push_back(std::make_shared<HostTensor>(output));
+ }
+
+ node->evaluate(outputTensors, inputTensors);
+ return outputTensors;
+}
+
+PartialShape evaluateTargetShape(const Output<Node>& value) {
+ static Evaluator<HostTensorPtr>::op_handler_map handlers = {
+ {opset3::ShapeOf::type_info, evaluateShapeOf},
+ {opset3::Constant::type_info, evaluateConstant},
+ {opset3::Gather::type_info, evaluateOp},
+ {opset3::Concat::type_info, evaluateOp}};
+ Evaluator<HostTensorPtr>::value_map value_map;
+ Evaluator<HostTensorPtr> evaluator(handlers, value_map);
+
+ const auto shapeTensor = evaluator.evaluate(value);
+ if (!shapeTensor || !shapeTensor->get_is_allocated()) {
+ return PartialShape::dynamic();
+ }
+ const auto shapeConstNode = std::make_shared<opset3::Constant>(shapeTensor);
+ const auto resultShape = Shape{shapeConstNode->cast_vector<size_t>()};
+
+ return resultShape;
+}
+
+} // namespace
+
+constexpr NodeTypeInfo StaticShapeBroadcast::type_info;
+
+StaticShapeBroadcast::StaticShapeBroadcast(const Output<Node>& arg,
+ const Output<Node>& targetShape,
+ const Output<Node>& axesMapping,
+ const ngraph::op::BroadcastModeSpec& broadcastSpec)
+ : ::ngraph::op::util::BroadcastBase{arg, targetShape, axesMapping, broadcastSpec},
+ m_evaluatedOutputShape{PartialShape::dynamic()} {
+ constructor_validate_and_infer_types();
+}
+
+StaticShapeBroadcast::StaticShapeBroadcast(const Output<Node>& arg,
+ const Output<Node>& targetShape,
+ const ngraph::op::BroadcastModeSpec& broadcastSpec)
+ : ::ngraph::op::util::BroadcastBase{arg, targetShape, broadcastSpec},
+ m_evaluatedOutputShape{PartialShape::dynamic()} {
+ constructor_validate_and_infer_types();
+}
+
+void StaticShapeBroadcast::validate_and_infer_types() {
+ if (m_mode.m_type == ngraph::op::BroadcastType::EXPLICIT) {
+ NODE_VALIDATION_CHECK(this, get_input_size() == 3,
+ "StaticShapeBroadcast (", get_friendly_name(), ") ",
+ "with explicit mode must have 3 inputs, provided: ",
+ get_input_size());
+ } else if (m_mode.m_type == ngraph::op::BroadcastType::NUMPY) {
+ NODE_VALIDATION_CHECK(this, get_input_size() == 2,
+ "StaticShapeBroadcast (", get_friendly_name(), ") ",
+ "with numpy mode must have 2 inputs, provided: ",
+ get_input_size());
+ } else {
+ NODE_VALIDATION_CHECK(this, false,
+ "StaticShapeBroadcast (", get_friendly_name(), ") ",
+ "doesn't support ", m_mode.m_type, " mode");
+ }
+
+ ::ngraph::op::util::BroadcastBase::validate_and_infer_types();
+
+ if (get_output_partial_shape(0).is_dynamic()) {
+ // Try to evaluate output shape. After some transformations further, we may not be able
+ // to evaluate the target shape again, then we will leave the evaluated shape unchanged.
+ // For example, DynamicToStaticShapeShapeOf remove ShapeOf and pass the second input of DSR.
+ const auto evaluatedTargetShape = evaluateTargetShape(input_value(1));
+ if (evaluatedTargetShape.is_static()) {
+ m_evaluatedOutputShape = evaluatedTargetShape;
+ }
+ NODE_VALIDATION_CHECK(this, m_evaluatedOutputShape.is_static(),
+ "StaticShapeBroadcast (", get_friendly_name(), ") ",
+ "can't evaluate output shape, got: ", m_evaluatedOutputShape);
+ set_output_type(0, get_input_element_type(0), m_evaluatedOutputShape);
+ }
+}
+
+std::shared_ptr<Node> StaticShapeBroadcast::clone_with_new_inputs(const OutputVector& newInputs) const {
+ check_new_args_count(this, newInputs);
+ if (newInputs.size() == 2) {
+ return std::make_shared<StaticShapeBroadcast>(
+ newInputs.at(0), newInputs.at(1), m_mode);
+ } else {
+ return std::make_shared<StaticShapeBroadcast>(
+ newInputs.at(0), newInputs.at(1), newInputs.at(2), m_mode);
+ }
+}
+
+bool StaticShapeBroadcast::visit_attributes(ngraph::AttributeVisitor& visitor) {
+ std::string mode;
+ if (m_mode.m_type == ngraph::op::BroadcastType::EXPLICIT) {
+ mode = "explicit";
+ } else if (m_mode.m_type == ngraph::op::BroadcastType::NUMPY) {
+ mode = "numpy";
+ }
+ visitor.on_attribute("mode", mode);
+
+ return true;
+}
+
+} // namespace op
+} // namespace vpu
+} // namespace ngraph
#include "vpu/ngraph/operations/static_shape_nonzero.hpp"
+#include "ngraph/runtime/host_tensor.hpp"
+
namespace ngraph { namespace vpu { namespace op {
constexpr NodeTypeInfo StaticShapeNonZero::type_info;
-StaticShapeNonZero::StaticShapeNonZero(const Output<Node>& input)
- : Op({input}) {
+StaticShapeNonZero::StaticShapeNonZero(const Output<Node>& input, const element::Type& output_type)
+ : Op({input}), m_output_type(output_type) {
constructor_validate_and_infer_types();
}
"StaticShapeNonZero input data type needs to be a numeric type. Got: ",
input_et);
+ NODE_VALIDATION_CHECK(this,
+ m_output_type == element::i32 || m_output_type == element::i64,
+ "StaticShapeNonZero output data type can be either i32 or i64");
+
const auto total_dim_size = Dimension(shape_size(arg_shape.to_shape()));
- set_output_type(0, element::i64, {arg_shape.rank(), total_dim_size});
- set_output_type(1, element::i64, {Dimension(2)});
+ set_output_type(0, m_output_type, {arg_shape.rank(), total_dim_size});
+ set_output_type(1, m_output_type, {Dimension(2)});
}
std::shared_ptr<Node> StaticShapeNonZero::copy_with_new_args(
const NodeVector& new_args) const {
check_new_args_count(this, new_args);
- return std::make_shared<StaticShapeNonZero>(new_args.at(0));
+ return std::make_shared<StaticShapeNonZero>(new_args.at(0), m_output_type);
}
bool StaticShapeNonZero::visit_attributes(ngraph::AttributeVisitor& visitor) {
+ visitor.on_attribute("output_type", m_output_type);
return true;
}
+namespace {
+
+template <typename InType, typename OutType>
+void staticShapeNonZeroReference(const InType* input, OutType* outIndices, OutType* outShape, const Shape& inputShape) {
+ auto strides = row_major_strides(inputShape);
+ auto totalDimSize = shape_size(inputShape);
+
+ const auto getCoord = [&strides](int offset){
+ std::vector<size_t> coord;
+ for (const size_t& stride : strides) {
+ coord.insert(coord.begin(), offset / stride);
+ offset %= stride;
+ }
+
+ return coord;
+ };
+
+ const auto addCoordToIndices = [&outIndices, &totalDimSize](const std::vector<size_t> &coord,
+ size_t nonZeroCount) {
+ for (int j = 0; j < coord.size(); ++j) {
+ outIndices[j * totalDimSize + nonZeroCount] = coord[j];
+ }
+ };
+
+ const InType zeroValue = InType{0};
+ const auto isNonZero = [&input, &zeroValue](size_t i) {
+ return input[i] != zeroValue;
+ };
+
+ size_t nonZeroCount = 0;
+ for (size_t i = 0; i < totalDimSize; ++i) {
+ if (isNonZero(i)) {
+ addCoordToIndices(getCoord(i), nonZeroCount++);
+ }
+ }
+
+ outShape[0] = nonZeroCount;
+ outShape[1] = inputShape.size();
+}
+
+template <element::Type_t InType>
+bool evaluate(const HostTensorPtr& input,
+ const HostTensorPtr& outIndices,
+ const HostTensorPtr& outShape) {
+ bool rc = true;
+
+ switch (outIndices->get_element_type()) {
+ case element::Type_t::i64:
+ staticShapeNonZeroReference(input->get_data_ptr<InType>(),
+ outIndices->get_data_ptr<element::Type_t::i64>(),
+ outShape->get_data_ptr<element::Type_t::i64>(),
+ input->get_shape());
+ break;
+ case element::Type_t::i32:
+ staticShapeNonZeroReference(input->get_data_ptr<InType>(),
+ outIndices->get_data_ptr<element::Type_t::i32>(),
+ outShape->get_data_ptr<element::Type_t::i32>(),
+ input->get_shape());
+ break;
+ default: rc = false; break;
+ }
+
+ return rc;
+}
+
+bool evaluateStaticShapeNonZero(const HostTensorPtr& input,
+ const HostTensorPtr& outIndices,
+ const HostTensorPtr& outShape) {
+ bool rc = true;
+
+ switch (input->get_element_type()) {
+ TYPE_CASE(i8)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(i16)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(i32)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(i64)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(u8)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(u16)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(u32)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(u64)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(bf16)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(f32)(input, outIndices, outShape);
+ break;
+ TYPE_CASE(f64)(input, outIndices, outShape);
+ break;
+ default: rc = false; break;
+ }
+
+ return rc;
+}
+
+} // namespace
+
+bool StaticShapeNonZero::evaluate(const HostTensorVector& outputs,
+ const HostTensorVector& inputs) {
+ return evaluateStaticShapeNonZero(inputs[0], outputs[0], outputs[1]);
+}
+
} // namespace op
} // namespace vpu
} // namespace ngraph
// SPDX-License-Identifier: Apache-2.0
//
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_broadcast.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_concat.hpp"
#include "vpu/ngraph/transformations/dynamic_to_static_shape_unary_elementwise.hpp"
#include "vpu/ngraph/transformations/dynamic_to_static_shape_roialign.hpp"
#include "vpu/ngraph/transformations/dynamic_to_static_shape_transpose.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp"
#include "vpu/ngraph/transformations/dynamic_to_static_shape_non_max_suppression.hpp"
#include "vpu/ngraph/transformations/dynamic_to_static_shape_nonzero.hpp"
#include "vpu/ngraph/transformations/dynamic_to_static_shape_binary_elementwise.hpp"
#include "vpu/ngraph/transformations/dynamic_to_static_shape.hpp"
#include "vpu/ngraph/transformations/dynamic_to_static_shape_squeeze.hpp"
#include "vpu/ngraph/transformations/dynamic_to_static_shape_unsqueeze.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_gather.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_shapeof.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_reshape.hpp"
+
#include "vpu/utils/error.hpp"
#include "ngraph/opsets/opset3.hpp"
bool isDynamic(const Node& node) {
const auto& outputs = node.outputs();
- return std::any_of(outputs.cbegin(), outputs.cend(), [](const Output<const Node>& output) { return output.get_partial_shape().is_dynamic(); });
+ return std::any_of(outputs.cbegin(), outputs.cend(), [](const Output<const Node>& output) {
+ VPU_THROW_UNLESS(output.get_partial_shape().rank() != ngraph::Rank::dynamic(),
+ "DynamicToStaticShape transformation: got dynamic rank for {} with type {} while only static is supported",
+ output.get_node_shared_ptr()->get_friendly_name(), output.get_node_shared_ptr()->get_type_name());
+
+ return output.get_partial_shape().is_dynamic();
+ });
}
bool validateStaticShapes(const ngraph::Function& function) {
{ngraph::opset3::Add::type_info, dynamicToStaticShapeBinaryEltwise},
{ngraph::opset3::Multiply::type_info, dynamicToStaticShapeBinaryEltwise},
{ngraph::opset3::Subtract::type_info, dynamicToStaticShapeBinaryEltwise},
+ {ngraph::opset3::VariadicSplit::type_info, dynamicToStaticShapeVariadicSplit},
{ngraph::opset3::Divide::type_info, dynamicToStaticShapeBinaryEltwise},
{ngraph::opset3::Equal::type_info, dynamicToStaticShapeBinaryEltwise},
{ngraph::opset3::Power::type_info, dynamicToStaticShapeBinaryEltwise},
{ngraph::opset3::NonMaxSuppression::type_info, dynamicToStaticNonMaxSuppression},
{ngraph::opset3::NonZero::type_info, dynamicToStaticShapeNonZero},
{ngraph::opset3::Transpose::type_info, dynamicToStaticShapeTranspose},
+ {ngraph::opset3::Concat::type_info, dynamicToStaticShapeConcat},
{ngraph::opset3::Convert::type_info, dynamicToStaticUnaryElementwise},
{ngraph::opset3::Clamp::type_info, dynamicToStaticUnaryElementwise},
{ngraph::opset3::Floor::type_info, dynamicToStaticUnaryElementwise},
{ngraph::opset3::Sigmoid::type_info, dynamicToStaticUnaryElementwise},
{ngraph::opset3::Sqrt::type_info, dynamicToStaticUnaryElementwise},
{ngraph::opset3::Squeeze::type_info, dynamicToStaticShapeSqueeze},
+ {ngraph::opset3::Gather::type_info, dynamicToStaticShapeGather},
{ngraph::opset3::Unsqueeze::type_info, dynamicToStaticShapeUnsqueeze},
{ngraph::opset3::ROIAlign::type_info, dynamicToStaticShapeROIAlign},
+ {ngraph::opset3::Reshape::type_info, dynamicToStaticShapeReshape},
+ {ngraph::opset3::Broadcast::type_info, dynamicToStaticShapeBroadcast},
};
return transformations;
}
transformations.emplace(ngraph::opset3::Result::type_info, [](const std::shared_ptr<ngraph::Node>&){});
}
-void DynamicToStaticShape::transform(ngraph::Function& function) const {
- for (const auto& operation : function.get_ordered_ops()) {
+void DynamicToStaticShape::transform(std::shared_ptr<ngraph::Function> function) const {
+ for (const auto& operation : function->get_ordered_ops()) {
if (!isDynamic(*operation)) {
continue;
}
transformation->second(operation);
}
- function.validate_nodes_and_infer_types();
- validateStaticShapes(function);
+ // Should be executed after all dynamic-to-static transformations
+ DynamicToStaticShapeShapeOf().run_on_function(function);
+
+ function->validate_nodes_and_infer_types();
+ validateStaticShapes(*function);
}
} // namespace vpu
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_broadcast.hpp"
+
+#include "vpu/ngraph/operations/static_shape_broadcast.hpp"
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include "vpu/utils/error.hpp"
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeBroadcast(std::shared_ptr<ngraph::Node> target) {
+ const auto broadcast = ngraph::as_type_ptr<ngraph::opset3::Broadcast>(target);
+ VPU_THROW_UNLESS(broadcast,
+ "dynamicToStaticShapeBroadcast transformation is not applicable for {}, "
+ "it should be {} instead",
+ target, ngraph::opset3::Broadcast::type_info.name);
+
+ std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> staticShapeBroadcast;
+ if (broadcast->get_broadcast_spec() == ngraph::op::BroadcastType::EXPLICIT) {
+ staticShapeBroadcast = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+ broadcast->input_value(0),
+ broadcast->input_value(1),
+ broadcast->input_value(2));
+ } else if (broadcast->get_broadcast_spec() == ngraph::op::BroadcastType::NUMPY) {
+ staticShapeBroadcast = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+ broadcast->input_value(0),
+ broadcast->input_value(1));
+ } else {
+ VPU_THROW_FORMAT("dynamicToStaticShapeBroadcast supports only explicit and numpy modes,"
+ "provided {}", broadcast->get_broadcast_spec().m_type);
+ }
+
+ auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+ staticShapeBroadcast->output(0), broadcast->input_value(1));
+
+ ngraph::replace_node(std::move(target), std::move(dsr));
+}
+
+} // namespace vpu
+
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_concat.hpp"
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include <vpu/utils/error.hpp>
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+#include <numeric>
+#include <utility>
+
+namespace vpu {
+
+void dynamicToStaticShapeConcat(std::shared_ptr<ngraph::Node> target) {
+ const auto inputs = target->input_values();
+
+ ngraph::OutputVector dsrInputs;
+ ngraph::OutputVector staticInputs;
+ for (const auto& input : inputs) {
+ const auto inputNode = input.get_node_shared_ptr();
+ if (ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(inputNode)) {
+ dsrInputs.emplace_back(input);
+ } else {
+ staticInputs.emplace_back(input);
+ }
+ }
+
+ VPU_THROW_UNLESS(!dsrInputs.empty(),
+ "DynamicToStaticShape transformation for {} of type {} expects at least "
+ "one {} as input, actual types: {}", target->get_friendly_name(),
+ target->get_type_info().name, ngraph::vpu::op::DynamicShapeResolver::type_info.name,
+ std::accumulate(inputs.begin(), inputs.end(), std::string(), [](
+ const std::string& typesStr, const ngraph::Output<ngraph::Node>& input) {
+ return typesStr + input.get_node_shared_ptr()->get_type_info().name + ", ";
+ }));
+
+ const auto firstDSRInputNode = dsrInputs.front().get_node_shared_ptr();
+ const auto shapeDataType = firstDSRInputNode->input(1).get_element_type();
+ const auto dataRank = firstDSRInputNode->get_output_partial_shape(0).rank().get_length();
+ const auto axis = ngraph::as_type_ptr<ngraph::opset3::Concat>(target)->get_concatenation_axis();
+
+ const auto shapeToConstant = [&shapeDataType, &dataRank](const ngraph::Shape& shape) {
+ return ngraph::opset3::Constant::create(
+ shapeDataType, {static_cast<size_t>(dataRank)}, shape)->output(0);
+ };
+
+ const auto getShapeFromDSR = [&target, &shapeDataType](const ngraph::Output<ngraph::Node>& dsrOutput) {
+ const auto dsrNode = dsrOutput.get_node_shared_ptr();
+ const auto dsrShapeInputValue = dsrNode->input_value(1);
+ VPU_THROW_UNLESS(dsrShapeInputValue.get_element_type() == shapeDataType,
+ "DynamicToStaticShape transformation for {} of type {} expects input "
+ "shape with {} type from {} argument of type {}, provided {}",
+ target->get_friendly_name(), target->get_type_info().name,
+ shapeDataType, dsrNode->get_friendly_name(), dsrNode->get_type_info().name,
+ dsrShapeInputValue.get_element_type());
+ return dsrShapeInputValue;
+ };
+
+ const auto sumOfShapes = [](const ngraph::Output<ngraph::Node>& shape1,
+ const ngraph::Output<ngraph::Node>& shape2) {
+ const auto shapeAccumulatorOp = std::make_shared<ngraph::opset3::Add>(shape1, shape2);
+ return shapeAccumulatorOp->output(0);
+ };
+
+ const auto divideDimsByNumOfInputsExceptAxis = [&target, &dataRank, &axis,
+ &shapeDataType, &shapeToConstant](
+ const ngraph::Output<ngraph::Node>& shape) {
+ ngraph::Shape dividerValues(dataRank, target->get_input_size());
+ dividerValues[axis] = 1;
+ const auto divider = shapeToConstant(dividerValues);
+ const auto divide = std::make_shared<ngraph::opset3::Divide>(shape, divider);
+ return divide->output(0);
+ };
+
+ const auto getAdditionalShapeFromStatic = [&target, &dataRank, &axis](
+ const ngraph::OutputVector& staticInputs) {
+ ngraph::Shape accumulatedStaticShapeValue(dataRank, 0);
+ for (const auto& staticInput : staticInputs) {
+ const auto& staticInputPartialShape = staticInput.get_partial_shape();
+ VPU_THROW_UNLESS(staticInputPartialShape.is_static(),
+ "DynamicToStaticShape transformation for {} of type {} expects static "
+ "shape on inputs without DSR", target->get_friendly_name(),
+ target->get_type_info().name);
+ accumulatedStaticShapeValue[axis] += static_cast<size_t>(staticInputPartialShape[axis]);
+ }
+ return accumulatedStaticShapeValue;
+ };
+
+ auto accumulatedShape = getShapeFromDSR(dsrInputs.front());
+ for (size_t dsrInputIdx = 1; dsrInputIdx < dsrInputs.size(); ++dsrInputIdx) {
+ const auto dsrInputShape = getShapeFromDSR(dsrInputs[dsrInputIdx]);
+ accumulatedShape = sumOfShapes(accumulatedShape, dsrInputShape);
+ }
+
+ if (dsrInputs.size() > 1) {
+ accumulatedShape = divideDimsByNumOfInputsExceptAxis(accumulatedShape);
+ }
+
+ if (!staticInputs.empty()) {
+ const auto accumulatedStaticShape = shapeToConstant(getAdditionalShapeFromStatic(staticInputs));
+ accumulatedShape = sumOfShapes(accumulatedShape, accumulatedStaticShape);
+ }
+
+ const auto copied = target->clone_with_new_inputs(target->input_values());
+ const auto outDsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+ copied, accumulatedShape);
+
+ ngraph::replace_node(std::move(target), outDsr);
+}
+
+} // namespace vpu
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_gather.hpp"
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include <vpu/utils/error.hpp>
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+#include <numeric>
+
+namespace vpu {
+
+void dynamicToStaticShapeGather(std::shared_ptr<ngraph::Node> target) {
+ const auto gather = ngraph::as_type_ptr<ngraph::opset3::Gather>(target);
+ VPU_THROW_UNLESS(gather, "dynamicToStaticShapeGather transformation is not applicable for {}, it should be {} instead",
+ target, ngraph::opset3::Gather::type_info);
+
+ int64_t axis = gather->get_axis();
+ VPU_THROW_UNLESS(axis != std::numeric_limits<int64_t>::max() && axis >= 0,
+ "dynamicToStaticShapeGather: Unsupported Gather axis {} for node {}", axis, gather);
+
+ auto shapeToConstant = [&gather](const ngraph::Output<ngraph::Node> & output) -> std::shared_ptr<ngraph::opset3::Constant> {
+ VPU_THROW_UNLESS(output.get_partial_shape().is_static(),
+ "DynamicToStaticShape transformation for {} of type {} expects static shape on inputs without DSR",
+ gather->get_friendly_name(), gather->get_type_info());
+ return ngraph::opset3::Constant::create(ngraph::element::i64, {output.get_shape().size()}, output.get_shape());
+ };
+
+ const auto dataDSR = ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(gather->input_value(0).get_node_shared_ptr());
+ const auto idxDSR = ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(gather->input_value(1).get_node_shared_ptr());
+
+ VPU_THROW_UNLESS(dataDSR || idxDSR, "DynamicToStaticShape transformation for {} of type {} expects at least one DSR as input",
+ gather->get_friendly_name(), gather->get_type_info());
+
+ const auto data_shape = dataDSR ? dataDSR->input_value(1) : shapeToConstant(gather->input_value(0));
+ const auto indices_shape = idxDSR ? idxDSR->input_value(1) : shapeToConstant(gather->input_value(1));
+
+ const auto copied = target->clone_with_new_inputs(target->input_values());
+
+
+ const auto & data_rank = data_shape.get_partial_shape();
+ const auto & indices_rank = indices_shape.get_partial_shape();
+ VPU_THROW_UNLESS(data_rank.is_static() && indices_rank.is_static(),
+ "DynamicToStaticShape transformation for {} doesn't support dynamic rank", gather);
+
+ const auto data_rank_value = data_rank[0].get_length();
+ const auto indices_rank_value = indices_rank[0].get_length();
+ ngraph::OutputVector output_dims;
+ if (axis) {
+ std::vector<int64_t> first_data_shape_part_indices(axis);
+ std::iota(first_data_shape_part_indices.begin(), first_data_shape_part_indices.end(), 0);
+ const auto first_data_shape_part = std::make_shared<ngraph::opset3::Gather>(
+ data_shape,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {first_data_shape_part_indices.size()}, first_data_shape_part_indices),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0}));
+ output_dims.push_back(first_data_shape_part);
+ }
+ if (indices_rank_value)
+ output_dims.push_back(indices_shape);
+ if (axis + 1 < data_rank_value) {
+ std::vector<int64_t> second_data_shape_part_indices(data_rank_value - axis - 1);
+ std::iota(second_data_shape_part_indices.begin(), second_data_shape_part_indices.end(), axis + 1);
+ const auto second_data_shape_part = std::make_shared<ngraph::opset3::Gather>(
+ data_shape,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {second_data_shape_part_indices.size()}, second_data_shape_part_indices),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0}));
+ output_dims.push_back(second_data_shape_part);
+ }
+ const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+ ngraph::replace_node(target, std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(copied, output_shape));
+}
+
+} // namespace vpu
#include "vpu/ngraph/operations/static_shape_nonzero.hpp"
#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include "vpu/utils/error.hpp"
#include "ngraph/graph_util.hpp"
+#include "ngraph/ops.hpp"
#include <memory>
namespace vpu {
-void dynamicToStaticShapeNonZero(std::shared_ptr<ngraph::Node> nonZero) {
- auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(nonZero->input(0).get_source_output());
+void dynamicToStaticShapeNonZero(std::shared_ptr<ngraph::Node> node) {
+ auto nonZero = std::dynamic_pointer_cast<ngraph::op::v3::NonZero>(node);
+ VPU_THROW_UNLESS(nonZero, "dynamicToStaticShapeNonZero transformation for {} of type {} expects {} as node for replacement",
+ node->get_friendly_name(), node->get_type_info(), ngraph::op::v3::NonZero::type_info);
+
+ auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(nonZero->input(0).get_source_output(), nonZero->get_output_type());
staticShapeNonZero->set_friendly_name(nonZero->get_friendly_name() + "/static_shape");
auto dynamicShapeResolver = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_reshape.hpp"
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include "vpu/ngraph/operations/out_shape_of_reshape.hpp"
+#include <vpu/utils/error.hpp>
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeReshape(std::shared_ptr<ngraph::Node> target) {
+ const auto dsr = target->get_argument(0);
+ VPU_THROW_UNLESS(ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(dsr),
+ "DynamicToStaticShape transformation for {} of type {} expects {} as input with index {}",
+ target->get_friendly_name(), target->get_type_info(), ngraph::vpu::op::DynamicShapeResolver::type_info, 0);
+
+ const auto outShapeDescriptor = target->get_argument(1);
+ VPU_THROW_UNLESS(ngraph::as_type_ptr<ngraph::opset3::Constant>(outShapeDescriptor),
+ "DynamicToStaticShape transformation for {] of type {} expects {} as input with index {}",
+ target->get_friendly_name(), target->get_type_info(), ngraph::opset3::Constant::type_info, 1);
+
+ const auto reshape = std::dynamic_pointer_cast<ngraph::opset3::Reshape>(target);
+ const auto copied = reshape->clone_with_new_inputs(target->input_values());
+ const auto inDataShape = dsr->input(1).get_source_output();
+
+ const auto outShapeOfReshape = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+ inDataShape, outShapeDescriptor, reshape->get_special_zero());
+
+ ngraph::replace_node(std::move(target), std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+ copied, outShapeOfReshape));
+}
+
+} // namespace vpu
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_shapeof.hpp"
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <vpu/utils/error.hpp>
+
+#include <ngraph/opsets/opset3.hpp>
+
+namespace vpu {
+
+DynamicToStaticShapeShapeOf::DynamicToStaticShapeShapeOf() : GraphRewrite() {
+ // We don't set strict_mode when use pattern Matcher,
+ // so we can set any type and shape for input.
+ auto inputWithAnyTypeAndShape = std::make_shared<ngraph::pattern::op::Label>(
+ ngraph::element::dynamic, ngraph::PartialShape{});
+ auto shapeOfPattern = std::make_shared<ngraph::opset3::ShapeOf>(inputWithAnyTypeAndShape);
+
+ ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) {
+ auto shapeOfNode = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(m.get_match_root());
+ if (!shapeOfNode) {
+ return false;
+ }
+
+ auto dsr = shapeOfNode->input_value(0).get_node_shared_ptr();
+ if (!ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(dsr)) {
+ return false;
+ }
+
+ ngraph::replace_node(shapeOfNode, dsr->input_value(1).get_node_shared_ptr());
+
+ return true;
+ };
+
+ auto m = std::make_shared<ngraph::pattern::Matcher>(shapeOfPattern, "DynamicToStaticShapeShapeOf");
+ this->add_matcher(m, callback, ngraph::pass::PassProperty::CHANGE_DYNAMIC_STATE);
+}
+
+} // namespace vpu
const auto shape = dsr->input(1).get_source_output();
const auto axis = std::make_shared<ngraph::opset3::Constant>(
- ngraph::element::u64,
+ ngraph::element::i64,
ngraph::Shape{std::initializer_list<std::size_t>{1}},
- std::vector<std::size_t>{0});
+ std::vector<std::int64_t>{0});
const auto scatterElementsUpdate = std::make_shared<ngraph::opset3::ScatterElementsUpdate>(shape, transposition, shape, axis);
ngraph::replace_node(std::move(target), std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(copied, scatterElementsUpdate));
}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp"
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include <vpu/utils/error.hpp>
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+#include <numeric>
+#include <ngraph/validation_util.hpp>
+
+namespace vpu {
+
+void dynamicToStaticShapeVariadicSplit(std::shared_ptr<ngraph::Node> target) {
+ const auto dsr = ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(target->input_value(0).get_node_shared_ptr());
+ VPU_THROW_UNLESS(dsr, "DynamicToStaticShape transformation for {} of type {} expects {} as input with index {}",
+ target->get_friendly_name(), target->get_type_info(), ngraph::vpu::op::DynamicShapeResolver::type_info, 0);
+
+ const auto axis_node = ngraph::as_type_ptr<ngraph::opset3::Constant>(target->input_value(1).get_node_shared_ptr());
+ VPU_THROW_UNLESS(axis_node, "dynamicToStaticShapeVariadic transformation is not applicable for {}, dynamic axis is not supported", target);
+
+ const auto data_rank = target->get_input_partial_shape(0).rank();
+ VPU_THROW_UNLESS(data_rank.is_static(), "dynamicToStaticShapeVariadic transformation for {} doesn't support dynamic rank", target);
+
+ int64_t axis = ngraph::normalize_axis(target->description(), axis_node->cast_vector<int64_t>()[0], data_rank);
+
+ const auto split_lengths_node = ngraph::as_type_ptr<ngraph::opset3::Constant>(target->input_value(2).get_node_shared_ptr());
+ VPU_THROW_UNLESS(split_lengths_node, "dynamicToStaticShapeVariadic transformation is not applicable for {}, dynamic split_length is not supported", target);
+ const auto split_lengths = split_lengths_node->cast_vector<int64_t>();
+
+ for (const auto & i : split_lengths) {
+ VPU_THROW_UNLESS(i != -1, "dynamicToStaticShapeVariadic transformation is not applicable for {}, split_length with -1 is not supported", target);
+ VPU_THROW_UNLESS(i > 0, "dynamicToStaticShapeVariadic transformation is not applicable for {}, non-positive split_length is not supported", target);
+ }
+
+ const auto data_shape = dsr->input_value(1).get_node_shared_ptr();
+ const auto copied = target->clone_with_new_inputs(target->input_values());
+ const auto data_rank_value = data_rank.get_length();
+ ngraph::OutputVector first_shape_part, second_shape_part;
+ if (axis) {
+ std::vector<int64_t> first_data_shape_part_indices(axis);
+ std::iota(first_data_shape_part_indices.begin(), first_data_shape_part_indices.end(), 0);
+ const auto first_data_shape_part = std::make_shared<ngraph::opset3::Gather>(
+ data_shape,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {first_data_shape_part_indices.size()}, first_data_shape_part_indices),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0}));
+ first_shape_part.push_back(first_data_shape_part);
+ }
+ if (axis + 1 < data_rank_value) {
+ std::vector<int64_t> second_data_shape_part_indices(data_rank_value - axis - 1);
+ std::iota(second_data_shape_part_indices.begin(), second_data_shape_part_indices.end(), axis + 1);
+ const auto second_data_shape_part = std::make_shared<ngraph::opset3::Gather>(
+ data_shape,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {second_data_shape_part_indices.size()}, second_data_shape_part_indices),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0}));
+ second_shape_part.push_back(second_data_shape_part);
+ }
+ for (auto i = 0; i < split_lengths.size(); ++i) {
+ const auto dim = ngraph::opset3::Constant::create(data_shape->get_element_type(), {1}, {split_lengths[i]});
+ if (!first_shape_part.empty() || !second_shape_part.empty()) {
+ ngraph::OutputVector output_dims{dim};
+ output_dims.insert(output_dims.begin(), first_shape_part.begin(), first_shape_part.end());
+ output_dims.insert(output_dims.end(), second_shape_part.begin(), second_shape_part.end());
+ const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+ target->output(i).replace(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(copied->output(i), output_shape));
+ } else {
+ target->output(i).replace(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(copied->output(i), dim));
+ }
+ }
+}
+
+} // namespace vpu
#include <set>
#include <stack>
#include <map>
-#include <stdexcept>
-#include <utility>
#include <functional>
#include <vpu/utils/error.hpp>
namespace {
-const std::set<char> whitespaces = {
- ' ',
- '\t',
+using ValueType = details::IntOrFloat;
+
+struct Operator {
+ int priority;
+ std::function<ValueType(ValueType, ValueType)> op;
};
-// priority, function
-using Operator = std::pair<int, std::function<int(int, int)>>;
+static const std::map<std::string, Operator> operators = {
+ { "+", { 0, std::plus<ValueType>() }},
+ { "-", { 0, std::minus<ValueType>() }},
+ { "*", { 1, std::multiplies<ValueType>() }},
+ { "/", { 1, std::divides<ValueType>() }},
+ { "%", { 1, std::modulus<ValueType>() }}
+};
-const std::map<char, Operator> operators = {
- { '+', { 0, std::plus<int>() } },
- { '-', { 0, std::minus<int>() } },
- { '*', { 1, std::multiplies<int>() } },
- { '/', { 1, std::divides<int>() } },
- { '%', { 1, std::modulus<int>() } },
+static const std::map<std::string, std::function<ValueType(ValueType)>> function = {
+ {"floor", [](ValueType x) { return ValueType{std::floor(x.toFloat())}; }},
+ {"ceil" , [](ValueType x) { return ValueType{std::ceil(x.toFloat())}; }},
+ {"round", [](ValueType x) { return ValueType{std::round(x.toFloat())}; }},
+ {"abs" , [](ValueType x) { return ValueType{std::abs(x.toFloat())}; }},
+ {"sqrt" , [](ValueType x) { return ValueType{std::sqrt(x.toFloat())}; }}
};
+bool isFunction(const std::string& token) {
+ return function.find(token) != function.end();
+}
+bool isOperator(const std::string& token) {
+ return operators.find(token) != operators.end();
+}
+int opPriority(const std::string& token) {
+ return operators.at(token).priority;
+}
+
} // namespace
-void SimpleMathExpression::parse(const std::string& expression) {
+void MathExpression::parse(const std::string& expression) {
_parsedTokens.clear();
+ std::stack<std::string> tokenStack;
- std::stack<char> operatorStack;
-
- // While there are tokens to be read.
- for (size_t i = 0; i != expression.length(); i++) {
- // Ignore whitespaces;
- while (whitespaces.find(expression[i]) != whitespaces.end()) {
- i++;
+ for (auto it = begin(expression); it != end(expression); ++it) {
+ if (*it == ' ' || *it == '\t') {
+ continue;
}
- // Read a token.
- auto curr = expression[i];
-
- // If the token is a number, then push it to the output queue.
- if (std::isdigit(curr)) {
+ // parse number
+ if (std::isdigit(*it)) {
size_t len = 0;
- auto value = std::stoi(expression.substr(i), &len);
+ const auto value = std::stof(&*it, &len);
- _parsedTokens.emplace_back(Token(Token::Value, value, 0));
-
- i += (len - 1);
+ _parsedTokens.emplace_back(TokenType::Value, ValueType{value}, "");
+ std::advance(it, len - 1);
continue;
}
- // If the token is a variable, then push it's value to the output queue.
- if (_vars.find(curr) != _vars.end()) {
- _parsedTokens.emplace_back(Token(Token::Value, _vars.at(curr), 0));
+ // parse variable/function
+ if (std::isalpha(*it)) {
+ const auto end_token = std::find_if_not(it, end(expression),
+ [](char c) { return std::isalnum(c) || c == '_'; });
+ const auto token = std::string(it, end_token);
+ std::advance(it, token.length() - 1);
- continue;
+ if (isFunction(token)) {
+ tokenStack.push(token);
+ continue;
+ }
+ if (_vars.find(token) != _vars.end()) {
+ _parsedTokens.emplace_back(TokenType::Value, ValueType{_vars.at(token)}, "");
+ continue;
+ }
}
- // If the token is an operator, then:
- if (operators.find(curr) != operators.end()) {
- // While there is an operator at the top of the operator stack with
- // greater than or equal to precedence:
- // pop operators from the operator stack, onto the output queue;
- while (!operatorStack.empty() &&
- (operators.find(operatorStack.top()) != operators.end()) &&
- (operators.at(operatorStack.top()).first >= operators.at(curr).first)) {
- auto op = operatorStack.top();
- operatorStack.pop();
-
- _parsedTokens.emplace_back(Token(Token::Operator, 0, op));
+ // parse operator
+ if (isOperator(std::string(1, *it))) {
+ while (!tokenStack.empty()
+ && (isFunction(tokenStack.top())
+ || (isOperator(tokenStack.top())
+ && opPriority(tokenStack.top()) >= opPriority(std::string(1, *it))))) {
+ const auto tokenType = isOperator(tokenStack.top()) ? TokenType::Operator
+ : TokenType::Function;
+ _parsedTokens.emplace_back(tokenType, ValueType{0}, tokenStack.top());
+ tokenStack.pop();
}
- // push the read operator onto the operator stack.
- operatorStack.push(curr);
-
+ tokenStack.push(std::string(1, *it));
continue;
}
- // If the token is a left bracket (i.e. "("), then:
- // push it onto the operator stack.
- if (curr == '(') {
- operatorStack.push(curr);
-
+ if (*it == '(') {
+ tokenStack.push("(");
continue;
}
- // If the token is a right bracket (i.e. ")"), then:
- if (curr == ')') {
- // While the operator at the top of the operator stack is not a left bracket:
- // pop operators from the operator stack onto the output queue;
- while (!operatorStack.empty() &&
- operatorStack.top() != '(') {
- _parsedTokens.emplace_back(Token(Token::Operator, 0, operatorStack.top()));
-
- operatorStack.pop();
+ if (*it == ')') {
+ while (!tokenStack.empty() && tokenStack.top() != "(") {
+ const auto tokenType = isOperator(tokenStack.top()) ? TokenType::Operator
+ : TokenType::Function;
+ _parsedTokens.emplace_back(tokenType, ValueType{0}, tokenStack.top());
+ tokenStack.pop();
}
- // pop the left bracket from the stack.
- // If the stack runs out without finding a left bracket, then there are mismatched parentheses.
- if (!operatorStack.empty() &&
- operatorStack.top() == '(') {
- operatorStack.pop();
+ if (!tokenStack.empty()) {
+ tokenStack.pop();
} else {
VPU_THROW_EXCEPTION << "Mismatched parentheses in " << expression;
}
continue;
}
- // Unknown token
- VPU_THROW_EXCEPTION << "Unknown token " << curr << " in " << expression;
+ VPU_THROW_EXCEPTION << "Unknown token " << *it << " in " << expression;
}
- // If there are no more tokens to read:
- // while there are still operator tokens on the stack:
- // if the operator token on the top of the stack is a bracket, then
- // there are mismatched parentheses;
- // pop the operator onto the output queue.
- while (!operatorStack.empty()) {
- if (operatorStack.top() == '(') {
+ while (!tokenStack.empty()) {
+ if (tokenStack.top() == "(") {
VPU_THROW_EXCEPTION << "Mismatched parentheses in " << expression;
}
-
- _parsedTokens.emplace_back(Token(Token::Operator, 0, operatorStack.top()));
-
- operatorStack.pop();
+ const auto tokenType = isOperator(tokenStack.top()) ? TokenType::Operator
+ : TokenType::Function;
+ _parsedTokens.emplace_back(tokenType, ValueType{0}, tokenStack.top());
+ tokenStack.pop();
}
}
-int SimpleMathExpression::evaluate() const {
- std::stack<int> values;
- for (const auto& t : _parsedTokens) {
- switch (t.type) {
- case Token::Value:
- values.push(t.value);
- break;
- case Token::Operator: {
- if (values.size() < 2) {
- VPU_THROW_EXCEPTION << "Illegal expression: not enough values for operator evaluation";
- }
+int MathExpression::evaluate() const {
+ std::stack<ValueType> values;
- // pop last 2 values and apply operand
- auto val2 = values.top();
- values.pop();
+ for (const auto& token : _parsedTokens) {
+ switch (token.type) {
+ case TokenType::Value:
+ values.push(token.value);
+ break;
+ case TokenType::Operator: {
+ if (values.size() < 2) {
+ VPU_THROW_EXCEPTION << "Illegal expression: not enough values for operator evaluation";
+ }
- auto val1 = values.top();
- values.pop();
+ auto val2 = values.top();
+ values.pop();
- values.push(operators.at(t.op).second(val1, val2));
+ auto val1 = values.top();
+ values.pop();
- break;
- }
- default:
- VPU_THROW_EXCEPTION << "Illegal expression: unhandled token";
+ values.push(operators.at(token.opName).op(val1, val2));
+ break;
+ }
+ case TokenType::Function: {
+ if (values.empty()) {
+ VPU_THROW_EXCEPTION << "Illegal expression: not enough values for function evaluation";
+ }
+ auto val1 = values.top();
+ values.pop();
+
+ values.push(function.at(token.opName)(val1));
+ break;
+ }
+ default:
+ VPU_THROW_EXCEPTION << "Illegal expression: unhandled token";
}
}
VPU_THROW_EXCEPTION << "Illegal expression: not enough operators";
}
- return values.top();
+ return values.top().toFloat();
}
} // namespace vpu
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
ushort extract_weights(uchar val, int bit)
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void Convolution1x1_NCHW(
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void Convolution3x3(const __global half* in_param,
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define MAX_OPENCL_BUFF_SIZE 64*1024
+
+// Define if runtime supports it. MX runtime is compatible, KMB is in WIP state
+#define USE_MANUAL_DMA 1
+
+#if defined (USE_MANUAL_DMA)
+void dmacpyLineSrcStrideStart(global half* from, private half* to, int size, int src_width, int src_stride)
+{
+ item_dma_event_t copyEvent = WorkItemDmaCreateStrideTransaction(from, to, src_width, src_width, src_stride, src_width, size, 0);
+ WaitWorkItemDmaEvents(1, ©Event);
+}
+
+void dmacpyLineDstStrideStart(private half* from, global half* to, int size, int src_width, int src_stride)
+{
+ item_dma_event_t copyEvent = WorkItemDmaCreateStrideTransaction(from, to, src_width, src_width, src_width, src_stride, size, 0);
+ WaitWorkItemDmaEvents(1, ©Event);
+}
+#endif
+
+void memzero(void * ptr, size_t num)
+{
+ float4* line0_ = (float4*) ptr;
+ #pragma unroll 16
+ for (int i = 0; i < num/16; i++)
+ {
+ line0_[i] = (float4){0.f, 0.f, 0.f, 0.f};
+ }
+ uchar* ptr_ = (uchar*) ptr;
+ for (int i = num/16*16; i < num; i++)
+ {
+ ptr_[i] = 0;
+ }
+}
+
+void __attribute__((noinline)) crosscorrh(__private const half* restrict line0,
+ __private const half* restrict line1,
+ __private half* restrict dline,
+ int topwidth,
+ int max_displacement,
+ int neighborhood_grid_radius,
+ int kernel_size,
+ int padding,
+ int bottomwidth,
+ int stride1,
+ int stride2,
+ int max_channels,
+ int cur_subchannels)
+{
+ if (max_channels == 64)
+ {
+ for (int i = 0; i < kernel_size; i++)
+ {
+ int x1 = max_displacement - padding + i;
+ int offset1 = x1 >= 0 ? 0 : (-x1 + stride1 - 1)/stride1;
+ x1 += offset1*stride1;
+
+ for (int blockIdx_x = offset1; blockIdx_x < topwidth && x1 < bottomwidth; blockIdx_x++, x1 += stride1)
+ {
+ int x2 = x1 - neighborhood_grid_radius*stride2;
+ int offset2 = x2 >= 0 ? 0 : (-x2 + stride2 - 1)/stride2;
+ x2 += offset2*stride2;
+
+ for (int top_channel_x = offset2 - neighborhood_grid_radius;
+ top_channel_x <= neighborhood_grid_radius && x2 < bottomwidth;
+ top_channel_x++, x2 += stride2)
+ {
+ half8 sum4 = (half8){0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
+
+ half8* src0 = (half8*)(line0 + x1*max_channels);
+ half8* src1 = (half8*)(line1 + x2*max_channels);
+
+ #pragma unroll 8
+ for (int ch = 0; ch < max_channels/8; ch++)
+ sum4 += (src0[ch])*(src1[ch]);
+
+ half sum = __builtin_shave_sau_sumx_f16_r(sum4);
+ dline[(top_channel_x + neighborhood_grid_radius)*topwidth + blockIdx_x] += (sum);
+ }
+ }
+ }
+ }
+ else
+ {
+ int neighborhood_grid_width = 2*neighborhood_grid_radius + 1;
+
+ for (int blockIdx_x = 0; blockIdx_x < topwidth; blockIdx_x++)
+ {
+ for (int i = 0; i < kernel_size; i++)
+ {
+ int x1 = blockIdx_x*stride1 + max_displacement + i - padding;
+
+ if ((x1 >= 0) && (x1 < bottomwidth))
+ {
+ int o_min = - neighborhood_grid_radius*stride2;
+ int o_max = neighborhood_grid_width*stride2 - neighborhood_grid_radius*stride2;
+ if ((o_min) < ( - x1)) o_min -= ((x1 + o_min - (stride2 - 1))/stride2)*stride2;
+ if ((o_max) >= (bottomwidth+stride2 - x1)) o_max -= ((x1 + o_max - bottomwidth )/stride2)*stride2;
+
+ int o = o_min;
+ for (; o <= o_max - 4*stride2; o += 4*stride2)
+ {
+ half8* bottom0 = (half8*)(line0 + x1*max_channels);
+ half8* bottom1_0 = (half8*)(line1 + (x1 + o + 0*stride2)*max_channels);
+ half8* bottom1_1 = (half8*)(line1 + (x1 + o + 1*stride2)*max_channels);
+ half8* bottom1_2 = (half8*)(line1 + (x1 + o + 2*stride2)*max_channels);
+ half8* bottom1_3 = (half8*)(line1 + (x1 + o + 3*stride2)*max_channels);
+
+ int c = 0;
+
+ half8 sum40 = 0;
+ half8 sum41 = 0;
+ half8 sum42 = 0;
+ half8 sum43 = 0;
+
+ for (; c <= cur_subchannels/8 - 4; c += 4)
+ {
+ sum40 += bottom0[c + 0] * bottom1_0[c + 0];
+ sum40 += bottom0[c + 1] * bottom1_0[c + 1];
+ sum40 += bottom0[c + 2] * bottom1_0[c + 2];
+ sum40 += bottom0[c + 3] * bottom1_0[c + 3];
+
+ sum41 += bottom0[c + 0] * bottom1_1[c + 0];
+ sum41 += bottom0[c + 1] * bottom1_1[c + 1];
+ sum41 += bottom0[c + 2] * bottom1_1[c + 2];
+ sum41 += bottom0[c + 3] * bottom1_1[c + 3];
+
+ sum42 += bottom0[c + 0] * bottom1_2[c + 0];
+ sum42 += bottom0[c + 1] * bottom1_2[c + 1];
+ sum42 += bottom0[c + 2] * bottom1_2[c + 2];
+ sum42 += bottom0[c + 3] * bottom1_2[c + 3];
+
+ sum43 += bottom0[c + 0] * bottom1_3[c + 0];
+ sum43 += bottom0[c + 1] * bottom1_3[c + 1];
+ sum43 += bottom0[c + 2] * bottom1_3[c + 2];
+ sum43 += bottom0[c + 3] * bottom1_3[c + 3];
+ }
+
+ for (; c < cur_subchannels/8; c++)
+ {
+ sum40 += bottom0[c] * bottom1_0[c];
+ sum41 += bottom0[c] * bottom1_1[c];
+ sum42 += bottom0[c] * bottom1_2[c];
+ sum43 += bottom0[c] * bottom1_3[c];
+ }
+
+ half sum0 = __builtin_shave_sau_sumx_f16_r(sum40);
+ half sum1 = __builtin_shave_sau_sumx_f16_r(sum41);
+ half sum2 = __builtin_shave_sau_sumx_f16_r(sum42);
+ half sum3 = __builtin_shave_sau_sumx_f16_r(sum43);
+
+ for (c = c*8; c < cur_subchannels; c++)
+ {
+ sum0 += line0[x1*max_channels + c] * line1[(x1 + o + 0*stride2)*max_channels + c];
+ sum1 += line0[x1*max_channels + c] * line1[(x1 + o + 1*stride2)*max_channels + c];
+ sum2 += line0[x1*max_channels + c] * line1[(x1 + o + 2*stride2)*max_channels + c];
+ sum3 += line0[x1*max_channels + c] * line1[(x1 + o + 3*stride2)*max_channels + c];
+ }
+
+ dline[blockIdx_x + (((o/stride2) + 0)*topwidth + neighborhood_grid_radius*topwidth)] += sum0;
+ dline[blockIdx_x + (((o/stride2) + 1)*topwidth + neighborhood_grid_radius*topwidth)] += sum1;
+ dline[blockIdx_x + (((o/stride2) + 2)*topwidth + neighborhood_grid_radius*topwidth)] += sum2;
+ dline[blockIdx_x + (((o/stride2) + 3)*topwidth + neighborhood_grid_radius*topwidth)] += sum3;
+ }
+
+ for (; o < o_max; o += 1*stride2)
+ {
+ half8* bottom0 = (half8*)(line0 + x1*max_channels);
+ half8* bottom1 = (half8*)(line1 + (x1 + o)*max_channels);
+
+ int c = 0;
+
+ half8 sum4 = 0;
+ for (; c <= cur_subchannels/8 - 4; c += 4)
+ {
+ sum4 += bottom0[c + 0] * bottom1[c + 0];
+ sum4 += bottom0[c + 1] * bottom1[c + 1];
+ sum4 += bottom0[c + 2] * bottom1[c + 2];
+ sum4 += bottom0[c + 3] * bottom1[c + 3];
+ }
+ for (; c < cur_subchannels/8; c++)
+ {
+ sum4 += bottom0[c] * bottom1[c];
+ }
+
+ half sum = __builtin_shave_sau_sumx_f16_r(sum4);
+
+ for (c = c*8; c < cur_subchannels; c++)
+ {
+ sum += line0[x1*max_channels + c] * line1[(x1 + o)*max_channels + c];
+ }
+
+ dline[blockIdx_x + (((o + neighborhood_grid_radius*stride2)/stride2)*topwidth)] += sum;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+__kernel void correlate2_half(__global const half* restrict bottom0,
+ __global const half* restrict bottom1,
+ __global half* restrict top,
+ int topwidth,
+ int topheight,
+ int bottomwidth,
+ int bottomheight,
+ int bottomchannels,
+ int max_displacement,
+ int padding,
+ int neighborhood_grid_radius,
+ int neighborhood_grid_width,
+ int kernel_size,
+ int stride1,
+ int stride2)
+{
+ int max_channels = (MAX_OPENCL_BUFF_SIZE/sizeof(half) - topwidth*neighborhood_grid_width) / (3*bottomwidth);
+ if (max_channels > 64) max_channels = 64;
+ int subchannels_count = (bottomchannels + max_channels - 1) / max_channels;
+ int subchannels = (bottomchannels + subchannels_count-1) / subchannels_count;
+ if (subchannels < max_channels) subchannels = max_channels;
+
+ const int sumelems = kernel_size*kernel_size*bottomchannels;
+
+ __private half cmx[MAX_OPENCL_BUFF_SIZE/sizeof(half)];
+
+ __private half* line0 = cmx;
+ __private half* line1 = line0 + bottomwidth*subchannels;
+ __private half* dline = line1 + bottomwidth*subchannels;
+
+ int blockIdx_y = get_global_id(0);
+
+#if defined(USE_MANUAL_DMA)
+ __private half* dmabuf = dline + topwidth*neighborhood_grid_width;
+#endif
+
+ int y1 = blockIdx_y*stride1 + max_displacement;
+
+ for (int j = 0; j < kernel_size; j++)
+ {
+ for (int bottomchannel = 0; bottomchannel < bottomchannels; bottomchannel += subchannels)
+ {
+ // configure channel batching
+ int startchannel = bottomchannel;
+ int endchannel = startchannel + subchannels > bottomchannels ? bottomchannels : startchannel + subchannels;
+ int deltachannels = endchannel-startchannel;
+
+ // load line form blob 0 with repackaging
+ if (y1+j-padding >= 0 && y1+j-padding < bottomheight)
+ {
+#if defined(USE_MANUAL_DMA)
+ __global const half* curr = bottom0 + startchannel*bottomheight*bottomwidth + (y1+j-padding)*bottomwidth;
+ dmacpyLineSrcStrideStart(curr,
+ dmabuf,
+ bottomwidth*deltachannels*sizeof(half),
+ bottomwidth*sizeof(half),
+ bottomwidth*bottomheight*sizeof(half));
+
+ for (int ch = 0; ch < deltachannels; ch++)
+ {
+ for (int blockIdx_x = 0; blockIdx_x < bottomwidth/8; blockIdx_x++)
+ {
+ half8 val = ((half8*)(dmabuf + ch*bottomwidth))[blockIdx_x];
+ line0[(blockIdx_x*8 + 0)*max_channels+ch] = val[0];
+ line0[(blockIdx_x*8 + 1)*max_channels+ch] = val[1];
+ line0[(blockIdx_x*8 + 2)*max_channels+ch] = val[2];
+ line0[(blockIdx_x*8 + 3)*max_channels+ch] = val[3];
+
+ line0[(blockIdx_x*8 + 4)*max_channels+ch] = val[4];
+ line0[(blockIdx_x*8 + 5)*max_channels+ch] = val[5];
+ line0[(blockIdx_x*8 + 6)*max_channels+ch] = val[6];
+ line0[(blockIdx_x*8 + 7)*max_channels+ch] = val[7];
+ }
+
+ for (int blockIdx_x = bottomwidth/8*8; blockIdx_x < bottomwidth; blockIdx_x++)
+ {
+ line0[(blockIdx_x)*max_channels+ch] = dmabuf[blockIdx_x + ch*bottomwidth];
+ }
+ }
+
+ if (deltachannels < subchannels)
+ for (int blockIdx_x = 0; blockIdx_x < bottomwidth; blockIdx_x++)
+ memzero(line0 + blockIdx_x*max_channels+deltachannels, (subchannels-deltachannels)*sizeof(half));
+#else
+ for (int blockIdx_x = 0; blockIdx_x < bottomwidth; blockIdx_x++)
+ {
+ for (int ch = 0; ch < deltachannels; ch++)
+ line0[blockIdx_x*max_channels+ch]
+ = bottom0[(ch+startchannel)*bottomheight*bottomwidth + (y1+j-padding)*bottomwidth + blockIdx_x];
+
+ if (deltachannels < subchannels)
+ memzero(line0 + blockIdx_x*max_channels+deltachannels, (subchannels-deltachannels)*sizeof(half));
+ }
+#endif
+ }
+ else
+ memzero(line0, max_channels*bottomwidth*sizeof(half));
+
+ for (int top_channel_y = 0; top_channel_y < neighborhood_grid_width; top_channel_y++)
+ {
+ int y2 = y1 + (top_channel_y - neighborhood_grid_radius) * stride2;
+
+ // load line form blob 1 with repackaging according to the line we work on now
+ if (y2+j-padding >= 0 && y2+j-padding < bottomheight)
+ {
+#if defined(USE_MANUAL_DMA)
+ __global const half* curr = bottom1 + startchannel*bottomheight*bottomwidth + (y2+j-padding)*bottomwidth;
+ dmacpyLineSrcStrideStart(curr,
+ dmabuf,
+ bottomwidth*deltachannels*sizeof(half),
+ bottomwidth*sizeof(half),
+ bottomwidth*bottomheight*sizeof(half));
+
+ for (int ch = 0; ch < deltachannels; ch++)
+ {
+ for (int blockIdx_x = 0; blockIdx_x < bottomwidth/8; blockIdx_x++)
+ {
+ half8 val = ((half8*)(dmabuf + ch*bottomwidth))[blockIdx_x];
+ line1[(blockIdx_x*8 + 0)*max_channels+ch] = val[0];
+ line1[(blockIdx_x*8 + 1)*max_channels+ch] = val[1];
+ line1[(blockIdx_x*8 + 2)*max_channels+ch] = val[2];
+ line1[(blockIdx_x*8 + 3)*max_channels+ch] = val[3];
+
+ line1[(blockIdx_x*8 + 4)*max_channels+ch] = val[4];
+ line1[(blockIdx_x*8 + 5)*max_channels+ch] = val[5];
+ line1[(blockIdx_x*8 + 6)*max_channels+ch] = val[6];
+ line1[(blockIdx_x*8 + 7)*max_channels+ch] = val[7];
+ }
+
+ for (int blockIdx_x = bottomwidth/8*8; blockIdx_x < bottomwidth; blockIdx_x++)
+ {
+ line1[(blockIdx_x)*max_channels+ch] = dmabuf[blockIdx_x + ch*bottomwidth];
+ }
+ }
+#else
+ for (int ch = 0; ch < deltachannels; ch++)
+ {
+ for (int blockIdx_x = 0; blockIdx_x < bottomwidth/8; blockIdx_x++)
+ {
+ half8 val = ((__global half8*)(bottom1 + (ch+startchannel)*bottomheight*bottomwidth + (y2+j-padding)*bottomwidth))[blockIdx_x];
+ line1[(blockIdx_x*8 + 0)*max_channels+ch] = val[0];
+ line1[(blockIdx_x*8 + 1)*max_channels+ch] = val[1];
+ line1[(blockIdx_x*8 + 2)*max_channels+ch] = val[2];
+ line1[(blockIdx_x*8 + 3)*max_channels+ch] = val[3];
+
+ line1[(blockIdx_x*8 + 4)*max_channels+ch] = val[4];
+ line1[(blockIdx_x*8 + 5)*max_channels+ch] = val[5];
+ line1[(blockIdx_x*8 + 6)*max_channels+ch] = val[6];
+ line1[(blockIdx_x*8 + 7)*max_channels+ch] = val[7];
+ }
+ for (int blockIdx_x = bottomwidth/8*8; blockIdx_x < bottomwidth; blockIdx_x++)
+ {
+ half val = (bottom1 + (ch+startchannel)*bottomheight*bottomwidth + (y2+j-padding)*bottomwidth)[blockIdx_x];
+ line1[(blockIdx_x)*max_channels+ch] = val;
+ }
+ }
+#endif
+ for (int blockIdx_x = 0; blockIdx_x < bottomwidth; blockIdx_x++)
+ {
+ if (deltachannels < subchannels)
+ memzero(line1 + blockIdx_x*max_channels+deltachannels, (subchannels-deltachannels)*sizeof(half));
+ }
+ }
+ else
+ memzero(line1, max_channels*bottomwidth*sizeof(half));
+
+ if(j == 0 && startchannel == 0)
+ {
+ memzero(dline, neighborhood_grid_width*topwidth*sizeof(half));
+ }
+ else
+ {
+#if defined(USE_MANUAL_DMA)
+ dmacpyLineSrcStrideStart(top + top_channel_y*neighborhood_grid_width*topheight*topwidth + blockIdx_y*topwidth,
+ dline,
+ topwidth*neighborhood_grid_width*sizeof(half),
+ topwidth*sizeof(half),
+ topwidth*topheight*sizeof(half));
+#else
+ for (int top_channel_x = 0; top_channel_x < neighborhood_grid_width; top_channel_x++)
+ {
+ for (int blockIdx_x = 0; blockIdx_x < topwidth/8; blockIdx_x++)
+ {
+ half8 val = ((__global half8*)(top + ((top_channel_y*neighborhood_grid_width+top_channel_x)*topheight*topwidth + blockIdx_y*topwidth)))[blockIdx_x];
+ ((half8*)(dline + top_channel_x*topwidth))[blockIdx_x] = val;
+ }
+ for (int blockIdx_x = (topwidth/8)*8; blockIdx_x < topwidth; blockIdx_x++)
+ {
+ dline[top_channel_x*topwidth+blockIdx_x] =
+ top[(top_channel_y*neighborhood_grid_width+top_channel_x)*topheight*topwidth + blockIdx_y*topwidth+blockIdx_x];
+ }
+ }
+#endif
+ }
+
+ if (y1+j-padding >= 0 && y1+j-padding < bottomheight && y2+j-padding >= 0 && y2+j-padding < bottomheight)
+ {
+ crosscorrh(line0, line1, dline, topwidth, max_displacement, neighborhood_grid_radius,
+ kernel_size, padding, bottomwidth, stride1, stride2, max_channels, subchannels);
+ }
+
+ if (j == kernel_size-1 && endchannel == bottomchannels)
+ {
+ half8 scale = (half8){(half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems};
+ for (int top_channel_x = 0; top_channel_x < neighborhood_grid_width; top_channel_x++)
+ {
+ for (int blockIdx_x = 0; blockIdx_x < topwidth/8; blockIdx_x++)
+ {
+ ((half8*)(dline + top_channel_x*topwidth))[blockIdx_x] =
+ ((half8*)(dline + top_channel_x*topwidth))[blockIdx_x] / scale;
+ }
+ for (int blockIdx_x = (topwidth/8)*8; blockIdx_x < topwidth; blockIdx_x++)
+ {
+ dline[top_channel_x*topwidth+blockIdx_x] = dline[top_channel_x*topwidth+blockIdx_x]/(half)sumelems;
+ }
+ }
+ }
+
+#if defined(USE_MANUAL_DMA)
+ dmacpyLineDstStrideStart(dline,
+ top + top_channel_y*neighborhood_grid_width*topheight*topwidth + blockIdx_y*topwidth,
+ topwidth*neighborhood_grid_width*sizeof(half),
+ topwidth*sizeof(half),
+ topwidth*topheight*sizeof(half));
+#else
+ for (int top_channel_x = 0; top_channel_x < neighborhood_grid_width; top_channel_x++)
+ {
+ for (int blockIdx_x = 0; blockIdx_x < topwidth/8; blockIdx_x++)
+ {
+ ((__global half8*)(top + ((top_channel_y*neighborhood_grid_width+top_channel_x)*topheight*topwidth + blockIdx_y*topwidth)))[blockIdx_x] =
+ ((half8*)(dline + top_channel_x*topwidth))[blockIdx_x] + (half8) {0, 0, 0, 0, 0, 0, 0, 0};
+ }
+ for (int blockIdx_x = (topwidth/8)*8; blockIdx_x < topwidth; blockIdx_x++)
+ {
+ top[(top_channel_y*neighborhood_grid_width+top_channel_x)*topheight*topwidth + blockIdx_y*topwidth+blockIdx_x]
+ = dline[top_channel_x*topwidth+blockIdx_x] + (half)0;
+ }
+ }
+#endif
+ }
+ }
+ }
+}
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-__kernel void ctc_ocl(__global half* probabilities,
- __global half* output_sequences,
- int C_)
-{
- size_t t = get_global_id(0);
-
- __global half* probs = probabilities + t * C_;
-
- int max_class_idx = 0;
- half max_prob = probs[0];
- ++probs;
- for (int c = 1 ; c < C_ ; c++, ++probs)
- {
- if (*probs > max_prob)
- {
- max_prob = *probs;
- max_class_idx = c;
+__global half *find(__global const half *begin, __global const half *end, half value) {
+ while (begin != end) {
+ if (*begin == value) {
+ return begin;
}
+ ++begin;
}
- output_sequences[t] = (half)max_class_idx;
+ return end;
+}
+
+#define USE_MANUAL_DMA
+
+#ifdef USE_MANUAL_DMA
+
+__kernel void __dma_preload_CTCDecoder(__global half *probabilities,
+ __global half *sequence_indicators,
+ __global half *output_sequences,
+ int width,
+ int height,
+ int channels,
+ __local half *local_src,
+ __local half *local_dst)
+{
+ WorkGroupDmaCreateStrideTransaction(
+ probabilities, // src
+ local_src, // dst
+ width * sizeof(half), // src_width,
+ width * sizeof(half), // dst_width,
+ width * height * sizeof(half), // src_stride,
+ width * sizeof(half), // dst_stride,
+ width * height * channels * sizeof(half), // size
+ 0);
}
-__kernel void postProcess(__global half* input,
- __global half* output,
- __global half* seq_ind,
- int height,
- int width,
- int classes)
+__kernel void __dma_postwrite_CTCDecoder(__global half *probabilities,
+ __global half *sequence_indicators,
+ __global half *output_sequences,
+ int width,
+ int height,
+ int channels,
+ __local half *local_src,
+ __local half *local_dst)
{
- int wr_index = 0;
- int rd_index = 0;
+ WorkGroupDmaCreateStrideTransaction(
+ local_dst, // src
+ output_sequences, // dst
+ channels * sizeof(half), // src_width,
+ channels * sizeof(half), // dst_width,
+ channels * sizeof(half), // src_stride,
+ channels * sizeof(half), // dst_stride,
+ channels * height * sizeof(half), // size
+ 0);
+}
- half update_data;
- int update_index;
+__kernel void CTCDecoder(__global half *probabilities,
+ __global half *sequence_indicators,
+ __global half *output_sequences,
+ int width,
+ int height,
+ int channels,
+ __local half *local_src,
+ __local half *local_dst)
+{
+ const int T = channels;
+ const int B = height;
+ const int C = width;
- for (int i = 0; i < classes; i++)
+ for (int i = 0; i < B*T; i++)
{
- output[i] = (half)(-1);
+ local_dst[i] = -1.h;
}
- for (int n = 0; n < height; ++n)
+ int output_index = 0;
+
+ for (int b = 0; b < B; ++b)
{
- int prev_class_id = -1;
- for (int t = 0; t < classes; ++t)
+ __global const half *seq_ind = sequence_indicators + b*T;
+ const int seq_len = find(seq_ind + 1, seq_ind + T, 0.h) - seq_ind;
+ const int time = min(seq_len, T);
+
+ int prev_class_idx = -1;
+
+ for (int t = 0; t < time; ++t)
{
- int class_id = (int)input[rd_index++];
- update_index = wr_index;
- update_data = output[update_index];
+ __local const half *probs = local_src + b*C + t*C*B;
+ int max_class_idx = 0;
+ half max_prob = probs[0];
- if ((class_id < (width - 1)) && !(1 && class_id == prev_class_id))
+ for (int c = 1; c < C; ++c)
{
- update_data = (half)class_id;
- wr_index++;
-
+ const half prob = probs[c];
+ if (prob > max_prob)
+ {
+ max_class_idx = c;
+ max_prob = prob;
+ }
}
- output[update_index] = update_data;
- prev_class_id = class_id;
- if (seq_ind[t + 1] == 0 ) {
- break;
+ if (max_class_idx < C-1 && max_class_idx != prev_class_idx)
+ {
+ local_dst[b*T + output_index] = (half)max_class_idx;
+ output_index++;
}
+
+ prev_class_idx = max_class_idx;
}
}
}
-__kernel void ctc_ref_fp16(__global half* probabilities, __global half* seq_ind, __global half* output_sequences, int C, int H, int W)
+#else
+
+__kernel void CTCDecoder(__global half *probabilities,
+ __global half *sequence_indicators,
+ __global half *output_sequences,
+ int width,
+ int height,
+ int channels,
+ __local half *local_src,
+ __local half *local_dst)
{
- int T_ = C;
- int N_ = H;
- int C_ = W;
+ const int T = channels;
+ const int B = height;
+ const int C = width;
- // Fill output_sequences with -1
- for (int i = 0; i < T_; i++)
+ for (int i = 0; i < B*T; i++)
{
- output_sequences[i] = (half)(-1.0);
+ output_sequences[i] = -1.h;
}
+
int output_index = 0;
- // Caffe impl
- for(int n = 0; n < N_; ++n)
+ for (int b = 0; b < B; ++b)
{
+ __global const half *seq_ind = sequence_indicators + b*T;
+ const int seq_len = find(seq_ind + 1, seq_ind + T, 0.h) - seq_ind;
+ const int time = min(seq_len, T);
+
int prev_class_idx = -1;
- for (int t = 0; t < T_; ++t)
+ for (int t = 0; t < time; ++t)
{
- // get maximum probability and its index
+ __global const half *probs = probabilities + b*C + t*C*B;
int max_class_idx = 0;
- __global half* probs;
- half max_prob;
-
- probs = probabilities + t*C_;
- max_prob = probs[0];
- ++probs;
+ half max_prob = probs[0];
- for (int c = 1; c < C_; ++c, ++probs)
+ for (int c = 1; c < C; ++c)
{
- if (*probs > max_prob)
+ const half prob = probs[c];
+ if (prob > max_prob)
{
max_class_idx = c;
- max_prob = *probs;
+ max_prob = prob;
}
}
- //if (max_class_idx != blank_index_
- // && !(merge_repeated_&& max_class_idx == prev_class_idx))
- if (max_class_idx < C_-1 && !(1 && max_class_idx == prev_class_idx))
+ if (max_class_idx < C-1 && max_class_idx != prev_class_idx)
{
- output_sequences[output_index] = (half)max_class_idx;
+ output_sequences[b*T + output_index] = (half)max_class_idx;
output_index++;
}
prev_class_idx = max_class_idx;
-
- // Assume sequence_indicators is always 1
- if (seq_ind[t + 1] == 0)
- {
- break;
- }
}
}
}
+
+#endif
-<!-- Should be enabled if support to detect required kernel based on tensor format is added -->
-<!-- <CustomLayer name="ReorgYolo" type="MVCL" version="1">
- <Kernel entry="reorg">
+<CustomLayer name="ReorgYolo" type="MVCL" version="1">
+ <Kernel entry="reorg_hwc_naive">
<Source filename="reorg_hwc.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BYXF"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BYXF"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="stride" type="int" source="stride"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="0"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="0"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="F,1,1" local="stride*stride,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src" type="input" port-index="0" format="BYXF"/>
- <Tensor arg-name="out" type="output" port-index="0" format="BYXF"/>
- <Scalar arg-name="w" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="h" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="stride" type="int" source="stride" />
- </Parameters>
- <WorkSizes dim="input,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
-</CustomLayer> -->
+</CustomLayer>
<CustomLayer name="ReorgYolo" type="MVCL" version="1">
- <Kernel entry="reorg_NCHW">
+ <Where stride="2"/>
+ <Kernel entry="reorg_chw">
<Source filename="reorg_chw.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="stride" type="int" source="stride"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*2*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*2*2"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="Y*F/(stride*stride),stride*stride,1" local="1,stride,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="out" type="output" port-index="0" format="BFYX"/>
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="stride" type="int" source="stride" />
- </Parameters>
- <WorkSizes dim="input,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
</CustomLayer>
-<!-- The latest version of the code is not backward compatible with Yolo-v2, it is to be checked by benchmark-->
-<!-- Add work group config understand parameter from IR -->
-<!-- <WorkSizes global="Y*X,num,1" local="X,num,1" dim="input,0"/> -->
-<CustomLayer name="RegionYolo" type="MVCL" version="1" max-shaves="3">
- <Where do_softmax="1" num="5"/>
- <Kernel entry="region_ocl">
- <Source filename="region_chw.bin"/>
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+ <Where do_softmax="1"/>
+ <Kernel entry="region_chw">
+ <Source filename="region.bin"/>
+ <Parameters>
+ <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="classes" type="int" source="classes"/>
+ <Scalar arg-name="coords" type="int" source="coords"/>
+ <Scalar arg-name="num" type="int" source="num"/>
+ <Scalar arg-name="maskSize" type="int" source="3"/>
+ <Scalar arg-name="doSoftmax" type="int" source="do_softmax"/>
+ </Parameters>
+ <WorkSizes global="((X+7)/8)*8*Y,num,1" local="((X+7)/8)*8,1,1" dim="input,0"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="dst_data" type="output" port-index="0" format="ANY"/>
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="classes" type="int" source="classes" />
- <Scalar arg-name="coords" type="int" source="coords" />
- <Scalar arg-name="num" type="int" source="num" />
- <Scalar arg-name="maskSize" type="int" source="3" />
- <Scalar arg-name="doSoftmax" type="int" source="1" />
- </Parameters>
- <WorkSizes global="Y*X,5,1" local="X,5,1" dim="input,0"/>
</CustomLayer>
-<CustomLayer name="RegionYolo" type="MVCL" version="1" max-shaves="3">
- <Where mask="0,1,2" do_softmax="0"/>
- <Kernel entry="region_ocl">
- <Source filename="region_chw.bin"/>
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+ <Where do_softmax="0" mask="0,1,2"/>
+ <Kernel entry="region_chw">
+ <Source filename="region.bin"/>
+ <Parameters>
+ <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="classes" type="int" source="classes"/>
+ <Scalar arg-name="coords" type="int" source="coords"/>
+ <Scalar arg-name="num" type="int" source="num"/>
+ <Scalar arg-name="maskSize" type="int" source="3"/>
+ <Scalar arg-name="doSoftmax" type="int" source="do_softmax"/>
+ </Parameters>
+ <WorkSizes global="((X+7)/8)*8*Y,3,1" local="((X+7)/8)*8,1,1" dim="input,0"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="dst_data" type="output" port-index="0" format="ANY"/>
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="classes" type="int" source="classes" />
- <Scalar arg-name="coords" type="int" source="coords" />
- <Scalar arg-name="num" type="int" source="num" />
- <Scalar arg-name="maskSize" type="int" source="3" />
- <Scalar arg-name="doSoftmax" type="int" source="0" />
- </Parameters>
- <WorkSizes global="Y*X,3,1" local="X,3,1" dim="input,0"/>
</CustomLayer>
-<CustomLayer name="RegionYolo" type="MVCL" version="1" max-shaves="3">
- <Where mask="3,4,5" do_softmax="0"/>
- <Kernel entry="region_ocl">
- <Source filename="region_chw.bin"/>
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+ <Where do_softmax="1"/>
+ <Kernel entry="region_hwc">
+ <Source filename="region.bin"/>
+ <Parameters>
+ <Tensor arg-name="src_data" type="input" port-index="0" format="BYXF"/>
+ <Tensor arg-name="dst_data" type="output" port-index="0" format="BYXF"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="classes" type="int" source="classes"/>
+ <Scalar arg-name="coords" type="int" source="coords"/>
+ <Scalar arg-name="num" type="int" source="num"/>
+ <Scalar arg-name="maskSize" type="int" source="3"/>
+ <Scalar arg-name="doSoftmax" type="int" source="do_softmax"/>
+ </Parameters>
+ <WorkSizes global="((X+7)/8)*8*Y,num,1" local="((X+7)/8)*8,1,1" dim="input,0"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="dst_data" type="output" port-index="0" format="ANY"/>
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="classes" type="int" source="classes" />
- <Scalar arg-name="coords" type="int" source="coords" />
- <Scalar arg-name="num" type="int" source="num" />
- <Scalar arg-name="maskSize" type="int" source="3" />
- <Scalar arg-name="doSoftmax" type="int" source="0" />
- </Parameters>
- <WorkSizes global="Y*X,3,1" local="X,3,1" dim="input,0"/>
</CustomLayer>
-<CustomLayer name="RegionYolo" type="MVCL" version="1" max-shaves="3">
- <Where mask="6,7,8" do_softmax="0"/>
- <Kernel entry="region_ocl">
- <Source filename="region_chw.bin"/>
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+ <Where do_softmax="0" mask="0,1,2"/>
+ <Kernel entry="region_hwc">
+ <Source filename="region.bin"/>
+ <Parameters>
+ <Tensor arg-name="src_data" type="input" port-index="0" format="BYXF"/>
+ <Tensor arg-name="dst_data" type="output" port-index="0" format="BYXF"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="classes" type="int" source="classes"/>
+ <Scalar arg-name="coords" type="int" source="coords"/>
+ <Scalar arg-name="num" type="int" source="num"/>
+ <Scalar arg-name="maskSize" type="int" source="3"/>
+ <Scalar arg-name="doSoftmax" type="int" source="do_softmax"/>
+ </Parameters>
+ <WorkSizes global="((X+7)/8)*8*Y,3,1" local="((X+7)/8)*8,1,1" dim="input,0"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="dst_data" type="output" port-index="0" format="ANY"/>
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="classes" type="int" source="classes" />
- <Scalar arg-name="coords" type="int" source="coords" />
- <Scalar arg-name="num" type="int" source="num" />
- <Scalar arg-name="maskSize" type="int" source="3" />
- <Scalar arg-name="doSoftmax" type="int" source="0" />
- </Parameters>
- <WorkSizes global="Y*X,3,1" local="X,3,1" dim="input,0"/>
</CustomLayer>
<!-- Pixel-wise kernel binding, local work group config is per line in the input tensor -->
<CustomLayer name="GRN" type="MVCL" version="1">
<Kernel entry="grn_NCHW">
<Source filename="grn.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*F*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*F*2"/>
+ <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="bias" type="float" source="bias"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="X,Y,1" local="X,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
- <Data arg-name="src" type="local_data" dim="input,0" size="X*F*2" />
- <Data arg-name="dst" type="local_data" dim="input,0" size="X*F*2" />
- <Scalar arg-name="C" type="int" port-index="0" source="I.F" />
- <Scalar arg-name="bias" type="float" source="bias" />
- </Parameters>
- <WorkSizes dim="input,0" global="X,Y,1" local="X,1,1"/>
</CustomLayer>
<!-- Two stage layer binding, first kernel computes mean and variance, the second one normalizes input tensor-->
-<CustomLayer name="MVN" stage="0" type="MVCL" version="1">
- <Kernel entry="reduction_mean">
+<CustomLayer name="MVN" type="MVCL" version="1">
+ <Kernel entry="reduction_mean" stage="0">
<Source filename="mvn.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="mean" type="output_buffer" port-index="0" dim="output,0" size="Y*F*4"/>
+ <Tensor arg-name="variance" type="output_buffer" port-index="1" dim="output,0" size="Y*F*4"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="across_channels" type="int" source="across_channels"/>
+ <Data arg-name="src_line" type="local_data" dim="input,0" size="X*2"/>
+ </Parameters>
+ <WorkSizes dim="output,0" global="1,Y,F" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="mean" type="output_buffer" port-index="0" dim="output,0" size="Y*F*4" />
- <Tensor arg-name="variance" type="output_buffer" port-index="1" dim="output,0" size="Y*F*4" />
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="across_channels" type="int" source="across_channels" />
- <Data arg-name="src_line" type="local_data" dim="input,0" size="X*2" />
- </Parameters>
- <WorkSizes dim="output,0" global="1,Y,F" local="1,1,1"/>
-</CustomLayer>
-<CustomLayer name="MVN" stage="1" type="MVCL" version="1">
- <Kernel entry="mvn_scale">
+ <Kernel entry="mvn_scale" stage="1">
<Source filename="mvn.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+ <Tensor arg-name="mean_part" type="input_buffer" port-index="0" dim="output,0" size="Y*F*4"/>
+ <Tensor arg-name="power_mean" type="input_buffer" port-index="1" dim="output,0" size="Y*F*4"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H1" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="across_channels" type="int" source="across_channels"/>
+ <Scalar arg-name="normalize_variance" type="int" source="normalize_variance"/>
+ <Scalar arg-name="nparts" type="int" port-index="0" source="I.Y"/>
+ <Data arg-name="src_line" type="local_data" dim="input,0" size="X*2"/>
+ <Data arg-name="dst_line" type="local_data" dim="input,0" size="X*2"/>
+ </Parameters>
+ <WorkSizes dim="output,0" global="1,Y,F" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
- <Tensor arg-name="mean_part" type="input_buffer" port-index="0" dim="output,0" size="Y*F*4" />
- <Tensor arg-name="power_mean" type="input_buffer" port-index="1" dim="output,0" size="Y*F*4" />
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H1" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="across_channels" type="int" source="across_channels" />
- <Scalar arg-name="normalize_variance" type="int" source="normalize_variance" />
- <Scalar arg-name="nparts" type="int" port-index="0" source="I.Y" />
- <Data arg-name="src_line" type="local_data" dim="input,0" size="X*2" />
- <Data arg-name="dst_line" type="local_data" dim="input,0" size="X*2" />
- </Parameters>
- <WorkSizes dim="output,0" global="1,Y,F" local="1,1,1"/>
</CustomLayer>
<!-- Single work group kernel for not embarrassingly-parallel use-case -->
<CustomLayer name="CTCGreedyDecoder" type="MVCL" version="1" max-shaves="1">
- <Kernel entry="ctc_ref_fp16">
+ <Kernel entry="CTCDecoder">
<Source filename="ctc.bin"/>
+ <Parameters>
+ <Tensor arg-name="probabilities" type="input" port-index="0" format="FYX"/>
+ <Tensor arg-name="sequence_indicators" type="input" port-index="1" format="BF"/>
+ <Tensor arg-name="output_sequences" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="width" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="height" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="channels" type="int" port-index="0" source="I.F"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="F*Y*X*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="F*Y*2"/>
+ </Parameters>
+ <WorkSizes dim="output,0" global="1,1,1" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="probabilities" type="input" port-index="0" format="FYX" />
- <Tensor arg-name="seq_ind" type="input" port-index="1" format="ANY" />
- <Tensor arg-name="output_sequences" type="output" port-index="0" format="BFYX" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="C" type="int" port-index="0" source="I.F" />
- </Parameters>
- <WorkSizes dim="output,0" global="1,1,1" local="1,1,1"/>
</CustomLayer>
<CustomLayer name="ShuffleChannel" type="MVCL" version="1">
<!-- <Where group="2"/> -->
<Kernel entry="ShuffleChannel">
<Source filename="shuffle_channels.bin"/>
+ <Parameters>
+ <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="G" type="int" source="group"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="F,1,1" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
- <Scalar arg-name="C" type="int" port-index="0" source="I.F" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="G" type="int" source="group" />
- </Parameters>
- <WorkSizes dim="input,0" global="F,1,1" local="1,1,1"/>
</CustomLayer>
<!-- Reference version of generic quantize layer, should be changed to FakeQuantize-->
<!-- <Where levels="2"/> -->
<Kernel entry="quantize">
<Source filename="quantize.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="input_low" type="input" port-index="1" format="ANY"/>
+ <Tensor arg-name="input_high" type="input" port-index="2" format="ANY"/>
+ <Tensor arg-name="output_low" type="input" port-index="3" format="ANY"/>
+ <Tensor arg-name="output_high" type="input" port-index="4" format="ANY"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="levels" type="int" source="levels"/>
+ <Scalar arg-name="input_low_size" type="int" port-index="1" source="I.F"/>
+ <Scalar arg-name="input_high_size" type="int" port-index="2" source="I.F"/>
+ <Scalar arg-name="output_low_size" type="int" port-index="3" source="I.F"/>
+ <Scalar arg-name="output_high_size" type="int" port-index="4" source="I.F"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*F*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*F*2"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="1,Y,1" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="input_low" type="input" port-index="1" format="ANY" />
- <Tensor arg-name="input_high" type="input" port-index="2" format="ANY" />
- <Tensor arg-name="output_low" type="input" port-index="3" format="ANY" />
- <Tensor arg-name="output_high" type="input" port-index="4" format="ANY" />
- <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
- <Scalar arg-name="levels" type="int" source="levels" />
- <Scalar arg-name="input_low_size" type="int" port-index="1" source="I.F" />
- <Scalar arg-name="input_high_size" type="int" port-index="2" source="I.F" />
- <Scalar arg-name="output_low_size" type="int" port-index="3" source="I.F" />
- <Scalar arg-name="output_high_size" type="int" port-index="4" source="I.F" />
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Data arg-name="src_local" type="local_data" dim="input,0" size="X*Y*2" />
- <Data arg-name="dst_local" type="local_data" dim="input,0" size="X*Y*2" />
- </Parameters>
- <WorkSizes dim="input,0" global="F,1,1" local="1,1,1"/>
</CustomLayer>
<!-- Reference version of generic quantize layer, should be changed to FakeQuantize-->
<Where levels="256"/>
<Kernel entry="quantize">
<Source filename="binary_layers.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="input_low" type="input" port-index="1" format="BFYX"/>
+ <Tensor arg-name="input_high" type="input" port-index="2" format="BFYX"/>
+ <Tensor arg-name="output_low" type="input" port-index="3" format="BFYX"/>
+ <Tensor arg-name="output_high" type="input" port-index="4" format="BFYX"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="levels" type="int" source="levels"/>
+ <Scalar arg-name="input_low_size" type="int" source="input_low_size"/>
+ <Scalar arg-name="input_high_size" type="int" source="input_high_size"/>
+ <Scalar arg-name="output_low_size" type="int" source="output_low_size"/>
+ <Scalar arg-name="output_high_size" type="int" source="output_high_size"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Data arg-name="src_local" type="local_data" dim="input,0" size="X*Y*2"/>
+ <Data arg-name="dst_local" type="local_data" dim="input,0" size="X*Y*2"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="1,1,F" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src" type="input" port-index="0" format="BFYX" />
- <Tensor arg-name="input_low" type="input" port-index="1" format="BFYX" />
- <Tensor arg-name="input_high" type="input" port-index="2" format="BFYX" />
- <Tensor arg-name="output_low" type="input" port-index="3" format="BFYX" />
- <Tensor arg-name="output_high" type="input" port-index="4" format="BFYX" />
- <Tensor arg-name="dst" type="output" port-index="0" format="BFYX" />
- <Scalar arg-name="levels" type="int" port-index="0" source="levels" />
- <Scalar arg-name="input_low_size" type="int" port-index="0" source="input_low_size" />
- <Scalar arg-name="input_high_size" type="int" port-index="0" source="input_high_size" />
- <Scalar arg-name="output_low_size" type="int" port-index="0" source="output_low_size" />
- <Scalar arg-name="output_high_size" type="int" port-index="0" source="output_high_size" />
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Data arg-name="src_local" type="local_data" dim="input,0" size="X*Y*2" />
- <Data arg-name="dst_local" type="local_data" dim="input,0" size="X*Y*2" />
- </Parameters>
- <WorkSizes dim="input,0" global="1,1,F" local="1,1,1"/>
</CustomLayer>
<CustomLayer name="QuantizeTemporaryType" type="MVCL" version="1">
<Where levels="2"/>
<Kernel entry="binarization">
<Source filename="binary_layers.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="input_low_high" type="input" port-index="1" format="BFYX"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="switch_out" type="int" source="switch_out"/>
+ <Scalar arg-name="input_low_high_size" type="int" source="input_low_size"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+ <Tensor arg-name="input_high" type="input" port-index="2" format="BFYX"/>
+ <Tensor arg-name="output_low" type="input" port-index="3" format="BFYX"/>
+ <Tensor arg-name="output_high" type="input" port-index="4" format="BFYX"/>
+ <Scalar arg-name="input_high_size" type="int" source="input_high_size"/>
+ <Scalar arg-name="output_low_size" type="int" source="output_low_size"/>
+ <Scalar arg-name="output_high_size" type="int" source="output_high_size"/>
+ <Data arg-name="src_local" type="local_data" dim="input,0" size="X*Y*2"/>
+ <Data arg-name="dst_local" type="local_data" dim="input,0" size="X*Y*2"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="1,1,F" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src" type="input" port-index="0" format="BFYX" />
- <Tensor arg-name="input_low_high" type="input" port-index="1" format="BFYX" />
- <Tensor arg-name="dst" type="output" port-index="0" format="BFYX" />
- <Scalar arg-name="switch_out" type="int" port-index="0" source="switch_out" />
- <Scalar arg-name="input_low_high_size" type="int" port-index="0" source="input_low_size" />
- <Scalar arg-name="W" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="H" type="int" port-index="0" source="I.Y" />
- <Tensor arg-name="input_high" type="input" port-index="2" format="BFYX" />
- <Tensor arg-name="output_low" type="input" port-index="3" format="BFYX" />
- <Tensor arg-name="output_high" type="input" port-index="4" format="BFYX" />
- <Scalar arg-name="input_high_size" type="int" port-index="0" source="input_high_size" />
- <Scalar arg-name="output_low_size" type="int" port-index="0" source="output_low_size" />
- <Scalar arg-name="output_high_size" type="int" port-index="0" source="output_high_size" />
- <Data arg-name="src_local" type="local_data" dim="input,0" size="X*Y*2" />
- <Data arg-name="dst_local" type="local_data" dim="input,0" size="X*Y*2" />
- </Parameters>
- <WorkSizes dim="input,0" global="1,1,F" local="1,1,1"/>
</CustomLayer>
<CustomLayer name="BinaryConvolution" type="MVCL" version="1">
- <Where kernel="3,3" />
+ <Where kernel="3,3"/>
<Kernel entry="binary_convolution">
<Source filename="binary_convolution3x3.bin"/>
+ <Parameters>
+ <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+ <Data arg-name="weights_data" type="data" source="weights" format="ANY"/>
+ <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="pad_value" type="float" source="pad_value"/>
+ <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="DW" type="int" port-index="0" source="dilations"/>
+ <Scalar arg-name="DH" type="int" port-index="1" source="dilations"/>
+ <Scalar arg-name="GC" type="int" source="group"/>
+ <Scalar arg-name="KW" type="int" port-index="0" source="kernel"/>
+ <Scalar arg-name="KH" type="int" port-index="1" source="kernel"/>
+ <Scalar arg-name="PW" type="int" port-index="0" source="pads_begin"/>
+ <Scalar arg-name="PH" type="int" port-index="1" source="pads_begin"/>
+ <Scalar arg-name="SW" type="int" port-index="0" source="strides"/>
+ <Scalar arg-name="SH" type="int" port-index="1" source="strides"/>
+ <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+
+ <Data arg-name="src_local" type="local_data" dim="input,0" size="X*F*3*2"/>
+ <Data arg-name="dst_local" type="local_data" dim="output,0" size="X*2"/>
+ </Parameters>
+ <WorkSizes dim="output,0" global="Y,F,1" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
- <Data arg-name="weights_data" type="data" source="weights" format="ANY" />
- <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
- <Scalar arg-name="pad_value" type="float" source="pad_value" />
- <Scalar arg-name="IW" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="IH" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="IC" type="int" port-index="0" source="I.F" />
- <Scalar arg-name="DW" type="int" port-index="0" source="dilations" />
- <Scalar arg-name="DH" type="int" port-index="1" source="dilations" />
- <Scalar arg-name="GC" type="int" source="group" />
- <Scalar arg-name="KW" type="int" port-index="0" source="kernel" />
- <Scalar arg-name="KH" type="int" port-index="1" source="kernel" />
- <Scalar arg-name="PW" type="int" port-index="0" source="pads_begin" />
- <Scalar arg-name="PH" type="int" port-index="1" source="pads_begin" />
- <Scalar arg-name="SW" type="int" port-index="0" source="strides" />
- <Scalar arg-name="SH" type="int" port-index="1" source="strides" />
- <Scalar arg-name="OW" type="int" port-index="0" source="O.X" />
-
- <Data arg-name="src_local" type="local_data" dim="input,0" size="X*F*3*2"/>
- <Data arg-name="dst_local" type="local_data" dim="output,0" size="X*2"/>
- </Parameters>
- <WorkSizes dim="output,0" global="Y,F,1" local="1,1,1"/>
</CustomLayer>
<CustomLayer name="BinaryConvolution" type="MVCL" version="1">
- <Where kernel="1,1" />
+ <Where kernel="1,1"/>
<Kernel entry="binary_convolution">
<Source filename="binary_convolution1x1.bin"/>
+ <Parameters>
+ <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+ <Data arg-name="weights_data" type="data" source="weights" format="ANY"/>
+ <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="pad_value" type="float" source="pad_value"/>
+ <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="DW" type="int" port-index="0" source="dilations"/>
+ <Scalar arg-name="DH" type="int" port-index="1" source="dilations"/>
+ <Scalar arg-name="GC" type="int" source="group"/>
+ <Scalar arg-name="KW" type="int" port-index="0" source="kernel"/>
+ <Scalar arg-name="KH" type="int" port-index="1" source="kernel"/>
+ <Scalar arg-name="PW" type="int" port-index="0" source="pads_begin"/>
+ <Scalar arg-name="PH" type="int" port-index="1" source="pads_begin"/>
+ <Scalar arg-name="SW" type="int" port-index="0" source="strides"/>
+ <Scalar arg-name="SH" type="int" port-index="1" source="strides"/>
+ <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+
+ <Data arg-name="src_local" type="local_data" dim="input,0" size="X*F*2"/>
+ <Data arg-name="dst_local" type="local_data" dim="output,0" size="X*2"/>
+ </Parameters>
+ <WorkSizes dim="output,0" global="Y,F,1" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
- <Data arg-name="weights_data" type="data" source="weights" format="ANY" />
- <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
- <Scalar arg-name="pad_value" type="float" source="pad_value" />
- <Scalar arg-name="IW" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="IH" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="IC" type="int" port-index="0" source="I.F" />
- <Scalar arg-name="DW" type="int" port-index="0" source="dilations" />
- <Scalar arg-name="DH" type="int" port-index="1" source="dilations" />
- <Scalar arg-name="GC" type="int" source="group" />
- <Scalar arg-name="KW" type="int" port-index="0" source="kernel" />
- <Scalar arg-name="KH" type="int" port-index="1" source="kernel" />
- <Scalar arg-name="PW" type="int" port-index="0" source="pads_begin" />
- <Scalar arg-name="PH" type="int" port-index="1" source="pads_begin" />
- <Scalar arg-name="SW" type="int" port-index="0" source="strides" />
- <Scalar arg-name="SH" type="int" port-index="1" source="strides" />
- <Scalar arg-name="OW" type="int" port-index="0" source="O.X" />
-
- <Data arg-name="src_local" type="local_data" dim="input,0" size="X*F*2"/>
- <Data arg-name="dst_local" type="local_data" dim="output,0" size="X*2"/>
- </Parameters>
- <WorkSizes dim="output,0" global="Y,F,1" local="1,1,1"/>
</CustomLayer>
<!-- Reference version of generic quantize binary convolution -->
<CustomLayer name="BinaryConvolution" type="MVCL" version="1">
<Kernel entry="binary_convolution">
<Source filename="binary_layers.bin"/>
+ <Parameters>
+ <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+ <Data arg-name="weights_data" type="data" source="weights" format="ANY"/>
+ <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="pad_value" type="float" source="pad_value"/>
+ <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="DW" type="int" port-index="0" source="dilations"/>
+ <Scalar arg-name="DH" type="int" port-index="1" source="dilations"/>
+ <Scalar arg-name="GC" type="int" source="group"/>
+ <Scalar arg-name="KW" type="int" port-index="0" source="kernel"/>
+ <Scalar arg-name="KH" type="int" port-index="1" source="kernel"/>
+ <Scalar arg-name="PW" type="int" port-index="0" source="pads_begin"/>
+ <Scalar arg-name="PH" type="int" port-index="1" source="pads_begin"/>
+ <Scalar arg-name="SW" type="int" port-index="0" source="strides"/>
+ <Scalar arg-name="SH" type="int" port-index="1" source="strides"/>
+ </Parameters>
+ <WorkSizes dim="output,0" global="X,Y,F" local="1,1,1"/>
</Kernel>
- <Parameters>
- <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
- <Data arg-name="weights_data" type="data" source="weights" format="ANY" />
- <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
- <Scalar arg-name="pad_value" type="float" source="pad_value" />
- <Scalar arg-name="IW" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="IH" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="IC" type="int" port-index="0" source="I.F" />
- <Scalar arg-name="DW" type="int" port-index="0" source="dilations" />
- <Scalar arg-name="DH" type="int" port-index="1" source="dilations" />
- <Scalar arg-name="GC" type="int" source="group" />
- <Scalar arg-name="KW" type="int" port-index="0" source="kernel" />
- <Scalar arg-name="KH" type="int" port-index="1" source="kernel" />
- <Scalar arg-name="PW" type="int" port-index="0" source="pads_begin" />
- <Scalar arg-name="PH" type="int" port-index="1" source="pads_begin" />
- <Scalar arg-name="SW" type="int" port-index="0" source="strides" />
- <Scalar arg-name="SH" type="int" port-index="1" source="strides" />
- </Parameters>
- <WorkSizes dim="output,0" global="X,Y,F" local="1,1,1"/>
</CustomLayer>
<CustomLayer name="Resample" type="MVCL" version="1">
- <Where antialias="0" />
- <Kernel entry="resample_nearest">
- <Source filename="resample_nn.bin" />
- </Kernel>
- <Parameters>
- <Tensor arg-name="src" type="input" port-index="0" format="BFYX" />
- <Tensor arg-name="dst" type="output" port-index="0" format="BFYX" />
- <Scalar arg-name="iw" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="ih" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="factor" type="float" port-index="0" source="factor" />
- <Scalar arg-name="ow" type="int" port-index="0" source="O.X" />
- <Scalar arg-name="oh" type="int" port-index="0" source="O.Y" />
- <Scalar arg-name="channels" type="int" port-index="0" source="I.F" />
- </Parameters>
- <WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+ <Where antialias="0"/>
+ <Kernel entry="resample_nearest">
+ <Source filename="resample_nn.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*ceil(1/factor)*F*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="output,0" size="X*F*2"/>
+ <Scalar arg-name="iw" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="ih" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="factor" type="float" source="factor"/>
+ <Scalar arg-name="ow" type="int" port-index="0" source="O.X"/>
+ <Scalar arg-name="oh" type="int" port-index="0" source="O.Y"/>
+ <Scalar arg-name="channels" type="int" port-index="0" source="I.F"/>
+ </Parameters>
+ <WorkSizes global="1,Y,1" local="1,1,1" dim="output,0"/>
+ </Kernel>
</CustomLayer>
<CustomLayer name="Resample" type="MVCL" version="1">
- <Where antialias="1"/>
-<Kernel entry="resample_with_antialias">
- <Source filename="resample_with_antialias.bin" />
-</Kernel>
-<Parameters>
- <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
- <Scalar arg-name="iw" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="ih" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="factor" type="float" port-index="0" source="factor" />
- <Scalar arg-name="ow" type="int" port-index="0" source="O.X" />
- <Scalar arg-name="oh" type="int" port-index="0" source="O.Y" />
- <Scalar arg-name="channels" type="int" port-index="0" source="I.F" />
-</Parameters>
-<WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+ <Where antialias="1"/>
+ <Kernel entry="resample_with_antialias">
+ <Source filename="resample_with_antialias.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*5*F*2"/>
+ <Data arg-name="local_dst" type="local_data" dim="output,0" size="X*F*2"/>
+ <Scalar arg-name="iw" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="ih" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="factor" type="float" source="factor"/>
+ <Scalar arg-name="ow" type="int" port-index="0" source="O.X"/>
+ <Scalar arg-name="oh" type="int" port-index="0" source="O.Y"/>
+ <Scalar arg-name="channels" type="int" port-index="0" source="I.F"/>
+ </Parameters>
+ <WorkSizes global="1,round(Y*factor),F" local="1,1,F" dim="input,0"/>
+ </Kernel>
</CustomLayer>
<CustomLayer name="Convolution" type="MVCL" version="1">
- <Where isHWC="0"/>
-<Kernel entry="Convolution1x1_NCHW">
- <Source filename="convolution1x1.bin" />
-</Kernel>
-<Parameters>
- <Tensor arg-name="in" type="input" port-index="0" format="BFYX" />
- <Tensor arg-name="out" type="output" port-index="0" format="BFYX" />
- <Data arg-name="w" type="data" source="weights" format="ANY"/>
- <Scalar arg-name="IW" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="IH" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="IC" type="int" port-index="0" source="I.F" />
- <Scalar arg-name="OW" type="int" port-index="0" source="O.X" />
- <Scalar arg-name="OH" type="int" port-index="0" source="O.Y" />
- <Scalar arg-name="OC" type="int" port-index="0" source="O.F" />
-
- <Scalar arg-name="stride-x" type="int" port-index="0" source="stride-x"/>
- <Scalar arg-name="stride-y" type="int" port-index="0" source="stride-y"/>
- <Scalar arg-name="pad-x" type="int" port-index="0" source="pad-x" />
- <Scalar arg-name="pad-y" type="int" port-index="0" source="pad-y" />
- <Scalar arg-name="kernel-x" type="int" port-index="0" source="kernel-x"/>
- <Scalar arg-name="kernel-y" type="int" port-index="0" source="kernel-y"/>
- <Scalar arg-name="output" type="int" port-index="0" source="output" />
- <Scalar arg-name="group" type="int" port-index="0" source="group" />
-
- <Data arg-name="in_local" type="local_data" dim="input,0" size="X*F*2"/>
- <Data arg-name="out_local" type="local_data" dim="output,0" size="X*2"/>
-
-</Parameters>
-<WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+ <Where kernel="1,1" dilation="1,1"/>
+ <Kernel entry="Convolution1x1_NCHW">
+ <Source filename="convolution1x1.bin"/>
+ <Parameters>
+ <Tensor arg-name="in" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="out" type="output" port-index="0" format="BFYX"/>
+ <Data arg-name="w" type="data" source="weights" format="ANY"/>
+ <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+ <Scalar arg-name="OH" type="int" port-index="0" source="O.Y"/>
+ <Scalar arg-name="OC" type="int" port-index="0" source="O.F"/>
+
+ <Scalar arg-name="stride-x" type="int" port-index="0" source="stride"/>
+ <Scalar arg-name="stride-y" type="int" port-index="1" source="stride"/>
+ <Scalar arg-name="pad-x" type="int" port-index="0" source="pads_begin"/>
+ <Scalar arg-name="pad-y" type="int" port-index="1" source="pads_begin"/>
+ <Scalar arg-name="kernel-x" type="int" port-index="0" source="kernel"/>
+ <Scalar arg-name="kernel-y" type="int" port-index="1" source="kernel"/>
+ <Scalar arg-name="output" type="int" port-index="0" source="output"/>
+ <Scalar arg-name="group" type="int" port-index="0" source="group"/>
+
+ <Data arg-name="in_local" type="local_data" dim="input,0" size="X*F*2"/>
+ <Data arg-name="out_local" type="local_data" dim="output,0" size="X*2"/>
+
+ </Parameters>
+ <WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+ </Kernel>
</CustomLayer>
<CustomLayer name="Convolution" type="MVCL" version="1">
- <Where isHWC="1"/>
-<Kernel entry="Convolution1x1_NHWC">
- <Source filename="convolution1x1.bin" />
-</Kernel>
-<Parameters>
- <Tensor arg-name="in" type="input" port-index="0" format="BYXF" />
- <Tensor arg-name="out" type="output" port-index="0" format="BFYX" />
- <Data arg-name="w" type="data" source="weights" format="ANY"/>
- <Scalar arg-name="IW" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="IH" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="IC" type="int" port-index="0" source="I.F" />
- <Scalar arg-name="OW" type="int" port-index="0" source="O.X" />
- <Scalar arg-name="OH" type="int" port-index="0" source="O.Y" />
- <Scalar arg-name="OC" type="int" port-index="0" source="O.F" />
-
- <Scalar arg-name="stride-x" type="int" port-index="0" source="stride-x"/>
- <Scalar arg-name="stride-y" type="int" port-index="0" source="stride-y"/>
- <Scalar arg-name="pad-x" type="int" port-index="0" source="pad-x" />
- <Scalar arg-name="pad-y" type="int" port-index="0" source="pad-y" />
- <Scalar arg-name="kernel-x" type="int" port-index="0" source="kernel-x"/>
- <Scalar arg-name="kernel-y" type="int" port-index="0" source="kernel-y"/>
- <Scalar arg-name="output" type="int" port-index="0" source="output" />
- <Scalar arg-name="group" type="int" port-index="0" source="group" />
-
- <Data arg-name="in_local" type="local_data" dim="input,0" size="X*F*2"/>
- <Data arg-name="out_local" type="local_data" dim="output,0" size="X*2"/>
-</Parameters>
-<WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+ <Where kernel="1,1" dilation="1,1"/>
+ <Kernel entry="Convolution1x1_NHWC">
+ <Source filename="convolution1x1.bin"/>
+ <Parameters>
+ <Tensor arg-name="in" type="input" port-index="0" format="BYXF"/>
+ <Tensor arg-name="out" type="output" port-index="0" format="BFYX"/>
+ <Data arg-name="w" type="data" source="weights" format="ANY"/>
+ <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+ <Scalar arg-name="OH" type="int" port-index="0" source="O.Y"/>
+ <Scalar arg-name="OC" type="int" port-index="0" source="O.F"/>
+
+ <Scalar arg-name="stride-x" type="int" port-index="0" source="stride"/>
+ <Scalar arg-name="stride-y" type="int" port-index="1" source="stride"/>
+ <Scalar arg-name="pad-x" type="int" port-index="0" source="pads_begin"/>
+ <Scalar arg-name="pad-y" type="int" port-index="1" source="pads_begin"/>
+ <Scalar arg-name="kernel-x" type="int" port-index="0" source="kernel"/>
+ <Scalar arg-name="kernel-y" type="int" port-index="1" source="kernel"/>
+ <Scalar arg-name="output" type="int" port-index="0" source="output"/>
+ <Scalar arg-name="group" type="int" port-index="0" source="group"/>
+
+ <Data arg-name="in_local" type="local_data" dim="input,0" size="X*F*2"/>
+ <Data arg-name="out_local" type="local_data" dim="output,0" size="X*2"/>
+ </Parameters>
+ <WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+ </Kernel>
</CustomLayer>
<CustomLayer name="Convolution" type="MVCL" version="1">
-<Where kernel-x="3"/>
-<Kernel entry="Convolution3x3">
- <Source filename="convolution3x3.bin" />
-</Kernel>
-<Parameters>
- <Tensor arg-name="in_param" type="input" port-index="0" format="BFYX" />
- <Tensor arg-name="out" type="output" port-index="0" format="BFYX" />
- <Data arg-name="w" type="data" source="weights" format="BFYX" />
- <Scalar arg-name="IW" type="int" port-index="0" source="I.X" />
- <Scalar arg-name="IH" type="int" port-index="0" source="I.Y" />
- <Scalar arg-name="IC" type="int" port-index="0" source="I.F" />
- <Scalar arg-name="OW" type="int" port-index="0" source="O.X" />
- <Scalar arg-name="OH" type="int" port-index="0" source="O.Y" />
- <Scalar arg-name="OC" type="int" port-index="0" source="O.F" />
- <Scalar arg-name="KX" type="int" port-index="0" source="kernel-x" />
- <Scalar arg-name="KY" type="int" port-index="0" source="kernel-y" />
- <Scalar arg-name="stride_x" type="int" port-index="0" source="stride-x" />
- <Scalar arg-name="stride_y" type="int" port-index="0" source="stride-y" />
- <Scalar arg-name="pad_x" type="int" port-index="0" source="pad-x" />
- <Scalar arg-name="pad_y" type="int" port-index="0" source="pad-y" />
- <Scalar arg-name="dilation_x" type="int" port-index="0" source="dilation-x"/>
- <Scalar arg-name="dilation_y" type="int" port-index="0" source="dilation-y"/>
-
- <Scalar arg-name="stride-x" type="int" port-index="1" source="stride-x" />
- <Scalar arg-name="stride-y" type="int" port-index="1" source="stride-y" />
- <Scalar arg-name="pad-x" type="int" port-index="1" source="pad-x" />
- <Scalar arg-name="pad-y" type="int" port-index="1" source="pad-y" />
- <Scalar arg-name="kernel-x" type="int" port-index="0" source="kernel-x" />
- <Scalar arg-name="kernel-y" type="int" port-index="0" source="kernel-y" />
- <Scalar arg-name="output" type="int" port-index="0" source="output" />
-
- <Data arg-name="in_local" type="local_data" dim="input,0" size="X*F*3*2"/>
- <Data arg-name="out_local" type="local_data" dim="output,0" size="X*F*2"/>
- <Data arg-name="w_local" type="local_data" dim="input,0" size="3*3*F*2"/>
-</Parameters>
-<WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+ <Where kernel="3,3" dilation="1,1"/>
+ <Kernel entry="Convolution3x3">
+ <Source filename="convolution3x3.bin"/>
+ <Parameters>
+ <Tensor arg-name="in_param" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="out" type="output" port-index="0" format="BFYX"/>
+ <Data arg-name="w" type="data" source="weights" format="BFYX"/>
+ <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+ <Scalar arg-name="OH" type="int" port-index="0" source="O.Y"/>
+ <Scalar arg-name="OC" type="int" port-index="0" source="O.F"/>
+ <Scalar arg-name="KX" type="int" port-index="0" source="kernel"/>
+ <Scalar arg-name="KY" type="int" port-index="1" source="kernel"/>
+ <Scalar arg-name="stride_x" type="int" port-index="0" source="stride"/>
+ <Scalar arg-name="stride_y" type="int" port-index="1" source="stride"/>
+ <Scalar arg-name="pad_x" type="int" port-index="0" source="pads_begin"/>
+ <Scalar arg-name="pad_y" type="int" port-index="1" source="pads_begin"/>
+ <Scalar arg-name="dilation_x" type="int" port-index="0" source="dilation"/>
+ <Scalar arg-name="dilation_y" type="int" port-index="1" source="dilation"/>
+ <Scalar arg-name="output" type="int" port-index="0" source="output"/>
+
+ <Data arg-name="in_local" type="local_data" dim="input,0" size="X*F*3*2"/>
+ <Data arg-name="out_local" type="local_data" dim="output,0" size="X*F*2"/>
+ <Data arg-name="w_local" type="local_data" dim="input,0" size="3*3*F*2"/>
+ </Parameters>
+ <WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+ </Kernel>
</CustomLayer>
<CustomLayer name="ExperimentalDetectronPriorGridGenerator" type="MVCL" version="1">
-<Kernel entry="experimental_detectron_prior_grid_generator">
- <Source filename="detectron_prior_grid_gen.bin"/>
-</Kernel>
-<Parameters>
- <Tensor arg-name="input_priors" type="input" port-index="0" format="BFYX"/>
- <Tensor arg-name="input_feature_map" type="input" port-index="1" format="BFYX"/>
- <Tensor arg-name="input_rois" type="input" port-index="2" format="BFYX"/>
- <Tensor arg-name="output" type="output" port-index="0" format="BFYX"/>
- <Data arg-name="local_input_priors" type="local_data" dim="input,1" size="X*2" />
- <Data arg-name="local_output" type="local_data" dim="input,1" size="((X+7)/8)*12*2"/>
- <Scalar arg-name="grid_h" type="int" port-index="1" source="I.Y"/>
- <Scalar arg-name="grid_w" type="int" port-index="1" source="I.X"/>
- <Scalar arg-name="stride_h" type="float" port-index="0" source="stride_h"/>
- <Scalar arg-name="stride_w" type="float" port-index="0" source="stride_w"/>
- <Scalar arg-name="num_priors" type="int" port-index="0" source="I.Y"/>
- <Scalar arg-name="num_anchors_per_prior" type="int" port-index="0" source="I.X"/>
-</Parameters>
-<WorkSizes dim="input,1" global="((X+31)/32)*32,Y,1" local="32,1,1"/>
+ <Kernel entry="experimental_detectron_prior_grid_generator">
+ <Source filename="detectron_prior_grid_gen.bin"/>
+ <Parameters>
+ <Tensor arg-name="input_priors" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="input_feature_map" type="input" port-index="1" format="BFYX"/>
+ <Tensor arg-name="input_rois" type="input" port-index="2" format="BFYX"/>
+ <Tensor arg-name="output" type="output" port-index="0" format="BFYX"/>
+ <Data arg-name="local_input_priors" type="local_data" dim="input,1" size="X*2"/>
+ <Data arg-name="local_output" type="local_data" dim="input,1" size="((X+7)/8)*12*2"/>
+ <Scalar arg-name="grid_h" type="int" port-index="1" source="I.Y"/>
+ <Scalar arg-name="grid_w" type="int" port-index="1" source="I.X"/>
+ <Scalar arg-name="stride_h" type="float" source="stride_h"/>
+ <Scalar arg-name="stride_w" type="float" source="stride_w"/>
+ <Scalar arg-name="num_priors" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="num_anchors_per_prior" type="int" port-index="0" source="I.X"/>
+ </Parameters>
+ <WorkSizes dim="input,1" global="((X+31)/32)*32,Y,1" local="32,1,1"/>
+ </Kernel>
+</CustomLayer>
+
+<CustomLayer name="Convert" type="MVCL" version="1">
+ <Kernel entry="cvtu8f16">
+ <Source filename="cvtu8f16.bin"/>
+ <Parameters>
+ <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="scale" type="float" source="scale"/>
+ <Scalar arg-name="bias" type="float" source="bias"/>
+ <Data arg-name="local_src" type="local_data" dim="input,0" size="X*1"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*2"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="X,Y,F" local="X,1,1"/>
+ </Kernel>
+</CustomLayer>
+
+<CustomLayer name="Correlate" type="MVCL" version="1">
+ <Kernel entry="correlate2_half">
+ <Source filename="correlate.bin"/>
+ <Parameters>
+ <Tensor arg-name="bottom0" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="bottom1" type="input" port-index="1" format="BFYX"/>
+ <Tensor arg-name="top" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="topwidth" type="int" source="top_width"/>
+ <Scalar arg-name="topheight" type="int" source="top_height"/>
+ <Scalar arg-name="bottomwidth" type="int" port-index="0" source="I.X"/>
+ <Scalar arg-name="bottomheight" type="int" port-index="0" source="I.Y"/>
+ <Scalar arg-name="bottomchannels" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="max_displacement" type="int" source="displacement"/>
+ <Scalar arg-name="padding" type="int" source="pad"/>
+ <Scalar arg-name="neighborhood_grid_radius" type="int" source="neighborhood_grid_radius"/>
+ <Scalar arg-name="neighborhood_grid_width" type="int" source="neighborhood_grid_width"/>
+ <Scalar arg-name="kernel_size" type="int" source="kernel_size"/>
+ <Scalar arg-name="stride1" type="int" port-index="0" source="stride"/>
+ <Scalar arg-name="stride2" type="int" port-index="1" source="stride"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="top_height,1,1" local="1,1,1"/>
+ </Kernel>
+</CustomLayer>
+
+<CustomLayer name="SpatialTransform" type="MVCL" version="1">
+ <Kernel entry="ocl_st">
+ <Source filename="st.bin"/>
+ <Parameters>
+ <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+ <Tensor arg-name="theta" type="input" port-index="1" format="ANY"/>
+ <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+ <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+ <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+ <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*F*2"/>
+ </Parameters>
+ <WorkSizes dim="input,0" global="(X+511)/512,Y,1" local="1,1,1"/>
+ </Kernel>
</CustomLayer>
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-__kernel void cvtu8f16(const __global uchar* restrict src,
- __global half* restrict dst,
- float scale,
- float bais)
+#define USE_MANUAL_DMA 1
+
+#if defined (USE_MANUAL_DMA)
+
+__kernel void __dma_preload_cvtu8f16(
+ __global uchar* restrict src,
+ __global half* restrict dst,
+ float scale,
+ float bias,
+ __local uchar* restrict local_src,
+ __local half* restrict local_dst)
{
- int idx = get_global_id(0)
- + get_global_id(1) * get_global_size(0)
- + get_global_id(2) * get_global_size(0) * get_global_size(1);
+ WorkGroupDmaCreate3DTransaction(
+ src + get_group_id(0)*get_local_size(0)
+ + get_group_id(1)*get_local_size(1)*get_global_size(0)
+ + get_group_id(2)*get_local_size(2)*get_global_size(0)*get_global_size(1), // src
+ local_src, // dst
+ get_local_size(0) * sizeof(uchar), // src width
+ get_local_size(0) * sizeof(uchar), // dst width
+ get_global_size(0) * sizeof(uchar), // src stride
+ get_local_size(0) * sizeof(uchar), // dst stride
+ get_local_size(2), // num planes
+ get_global_size(0) * get_global_size(1) * sizeof(uchar), // src plane stride
+ get_local_size(0) * get_local_size(1) * sizeof(uchar), // dst plane stride
+ get_local_size(0) * get_local_size(1) * sizeof(uchar), // plane size
+ 0);
+}
- dst[idx] = convert_half(src[idx])*(half)scale+(half)bais;
+__kernel void __dma_postwrite_cvtu8f16(
+ __global uchar* restrict src,
+ __global half* restrict dst,
+ float scale,
+ float bias,
+ __local uchar* restrict local_src,
+ __local half* restrict local_dst)
+{
+ WorkGroupDmaCreate3DTransaction(
+ local_dst, // src
+ dst + get_group_id(0)*get_local_size(0)
+ + get_group_id(1)*get_local_size(1)*get_global_size(0)
+ + get_group_id(2)*get_local_size(2)*get_global_size(0)*get_global_size(1), // dst
+ get_local_size(0) * sizeof(half), // src width
+ get_local_size(0) * sizeof(half), // dst width
+ get_local_size(0) * sizeof(half), // src stride
+ get_global_size(0) * sizeof(half), // dst stride
+ get_local_size(2), // num planes
+ get_local_size(0) * get_local_size(1) * sizeof(half), // src plane stride
+ get_global_size(0) * get_global_size(1) * sizeof(half), // dst plane stride
+ get_local_size(0) * get_local_size(1) * sizeof(half), // plane size
+ 0);
}
+
+__kernel void cvtu8f16(
+ __global uchar* restrict src,
+ __global half* restrict dst,
+ float scale,
+ float bias,
+ __local uchar* restrict local_src,
+ __local half* restrict local_dst)
+{
+ size_t idx = get_local_id(0) +
+ get_local_id(1)*get_local_size(0) +
+ get_local_id(2)*get_local_size(0)*get_local_size(1);
+ local_dst[idx] = convert_half(local_src[idx])*(half)scale+(half)bias;
+}
+
+#else // defined (USE_MANUAL_DMA)
+
+__kernel void cvtu8f16(
+ __global uchar* restrict src,
+ __global half* restrict dst,
+ float scale,
+ float bias,
+ __local uchar* restrict local_src, // unused, added for compatibility with DMA variant
+ __local half* restrict local_dst) // unused, added for compatibility with DMA variant
+{
+ int idx = get_global_id(0) +
+ get_global_id(1) * get_global_size(0) +
+ get_global_id(2) * get_global_size(0) * get_global_size(1);
+ dst[idx] = convert_half(src[idx])*(half)scale+(half)bias;
+}
+
+#endif // defined (USE_MANUAL_DMA)
+
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-// Define if runtime supports it. MX runtime is compatible, KMB is in WIP state
#define USE_MANUAL_DMA 1
#if defined (USE_MANUAL_DMA)
}
__kernel void grn_NCHW(
- __global const half* restrict src_data,
- __global half* restrict dst_data,
- __local half* restrict src,
- __local half* restrict dst,
+ __global const half* restrict src,
+ __global half* restrict dst,
+ __local half* restrict local_src,
+ __local half* restrict local_dst,
int C,
float bias)
{
#pragma unroll 8
for (int c = 0; c < C; c++)
{
- float val = (float) src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)];
+ float val = (float) local_src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)];
variance += val*val;
}
#pragma unroll 8
for (int c = 0; c < C; c++)
{
- dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]
- = src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance;
+ local_dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]
+ = local_src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance;
}
}
#else // defined (USE_MANUAL_DMA)
__kernel void grn_NCHW(
- __global const half* restrict src_data,
- __global half* restrict dst_data,
- __local half* restrict src, // unused, added for compatibility with DMA variant
- __local half* restrict dst, // unused, added for compatibility with DMA variant
+ __global const half* restrict src,
+ __global half* restrict dst,
+ __local half* restrict local_src, // unused, added for compatibility with DMA variant
+ __local half* restrict local_dst, // unused, added for compatibility with DMA variant
int C,
float bias)
{
#pragma unroll 4
for (int c = 0; c < C; c++)
{
- float val = (float) src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)];
+ float val = (float) src[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)];
variance += val*val;
}
#pragma unroll 4
for (int c = 0; c < C; c++)
{
- dst_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)]
- = src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] * hvariance;
+ dst[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)]
+ = src[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] * hvariance;
}
}
#endif // defined (USE_MANUAL_DMA)
-
-// doesn't work right now due to compiler limitation
-// ToDo: fix compiler
-#if defined(IN_KERNEL_DMA)
-
-#define MAX_LOCAL_W 224
-#define MAX_LOCAL_H 2
-#define MAX_LOCAL_C 24
-
-__kernel void grn_NCHW(__global const half* restrict src_data,
- __global half* restrict dst_data,
- int C,
- float bias)
-{
- __local half src[MAX_LOCAL_W*MAX_LOCAL_H*MAX_LOCAL_C]; // get_local_size(0)*get_local_size(1)*C
- __local half dst[MAX_LOCAL_W*MAX_LOCAL_H*MAX_LOCAL_C]; // get_local_size(0)*get_local_size(1)*C
-
- const size_t index = get_group_id(0)*get_local_size(0) + get_group_id(1)*get_local_size(1)*get_global_size(0);
-
- event_t e1 = async_work_group_copy_3D3D(
- src, // dst
- src_data + index, // src
- get_local_size(0), // num_elements_per_line,
- get_local_size(1), // num_lines,
- get_global_size(0) - get_local_size(0), // src_line_stride,
- 0, // dst_line_stride,
- C, // num_planes,
- get_global_size(0)*get_global_size(1) - get_local_size(0) * get_local_size(1), // src_plane_stride
- 0, // dst_plane_stride
- 0); // event
- wait_group_events(1, &e1);
-
- ////////////////////////
-
- float variance = bias + 1e-9f;
-
- #pragma unroll 8
- for (int c = 0; c < C; c++)
- {
- float val = (float) src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)];
- variance += val*val;
- }
-
- half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f);
-
- #pragma unroll 8
- for (int c = 0; c < C; c++)
- {
- dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]
- = src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance;
- }
-
- ////////////////////
-
- event_t e2 = async_work_group_copy_3D3D(
- dst_data + index, // src
- dst, // dst
- get_local_size(0), // num_elements_per_line,
- get_local_size(1), // num_lines,
- 0, // src_line_stride,
- get_global_size(0) - get_local_size(0), // dst_line_stride,
- C, // num_planes,
- 0, // src_plane_stride
- get_global_size(0)*get_global_size(1) - get_local_size(0) * get_local_size(1), // dst_plane_stride
- 0); // event
- wait_group_events(1, &e2);
-}
-#endif
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// Define if runtime supports it. MX runtime is compatible, KMB is in WIP state
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-__kernel void __dma_preload_quantize(const __global half* __restrict src,
- const __global half* __restrict input_low,
- const __global half* __restrict input_high,
- const __global half* __restrict output_low,
- const __global half* __restrict output_high,
- const __global half* __restrict dst,
- int levels,
- int input_low_size,
- int input_high_size,
- int output_low_size,
- int output_high_size,
- int W,
- int H,
- __local half* __restrict src_local,
- const __local half* __restrict dst_local)
+__kernel void __dma_preload_quantize(__global half const *const restrict src,
+ __global half const *const restrict input_low,
+ __global half const *const restrict input_high,
+ __global half const *const restrict output_low,
+ __global half const *const restrict output_high,
+ __global half *const restrict dst,
+ int levels,
+ int input_low_size,
+ int input_high_size,
+ int output_low_size,
+ int output_high_size,
+ int W,
+ int C,
+ __local half *const restrict local_src,
+ __local half const *const restrict local_dst)
{
- const int sizePlane = W*H;
- async_work_group_copy(src_local ,src + get_group_id(0)*sizePlane, sizePlane, 0);
+ WorkGroupDmaCreateStrideTransaction(
+ src + get_group_id(1) * get_local_size(1) * W, // src
+ local_src, // dst
+ W * sizeof(half), // src_width,
+ W * sizeof(half), // dst_width,
+ get_global_size(1) * W * sizeof(half), // src_stride,
+ W * sizeof(half), // dst_stride,
+ W * C * sizeof(half), // size
+ 0);
}
-__kernel void __dma_postwrite_quantize( const __global half* __restrict src,
- const __global half* __restrict input_low,
- const __global half* __restrict input_high,
- const __global half* __restrict output_low,
- const __global half* __restrict output_high,
- __global half* __restrict dst,
- int levels,
- int input_low_size,
- int input_high_size,
- int output_low_size,
- int output_high_size,
- int W,
- int H,
- const __local half* __restrict src_local,
- const __local half* __restrict dst_local)
+__kernel void __dma_postwrite_quantize(__global half const *const restrict src,
+ __global half const *const restrict input_low,
+ __global half const *const restrict input_high,
+ __global half const *const restrict output_low,
+ __global half const *const restrict output_high,
+ __global half *const restrict dst,
+ int levels,
+ int input_low_size,
+ int input_high_size,
+ int output_low_size,
+ int output_high_size,
+ int W,
+ int C,
+ __local half const *const restrict local_src,
+ __local half const *const restrict local_dst)
{
- const int sizePlane = W*H;
- async_work_group_copy(dst + get_group_id(0)*sizePlane ,dst_local, sizePlane, 0);
+ WorkGroupDmaCreateStrideTransaction(
+ local_dst, // src
+ dst + get_group_id(1) * get_local_size(1) * W, // dst
+ W * sizeof(half), // src_width,
+ W * sizeof(half), // dst_width,
+ W * sizeof(half), // src_stride,
+ get_global_size(1) * W * sizeof(half), // dst_stride,
+ W * C * sizeof(half), // size
+ 0);
}
-__kernel void quantize(const __global half* __restrict src,
- const __global half* __restrict input_low,
- const __global half* __restrict input_high,
- const __global half* __restrict output_low,
- const __global half* __restrict output_high,
- const __global half* __restrict dst,
- int levels,
- int input_low_size,
- int input_high_size,
- int output_low_size,
- int output_high_size,
- int W,
- int H,
- const __local half* __restrict src_local,
- __local half* __restrict dst_local)
+__kernel void quantize(__global half const *const restrict src,
+ __global half const *const restrict input_low,
+ __global half const *const restrict input_high,
+ __global half const *const restrict output_low,
+ __global half const *const restrict output_high,
+ __global half const *const restrict dst,
+ int levels,
+ int input_low_size,
+ int input_high_size,
+ int output_low_size,
+ int output_high_size,
+ int W,
+ int C,
+ __local half const *const restrict local_src,
+ __local half *const restrict local_dst)
{
-
- int c = get_global_id(0);
-
- int C = get_global_size(0);
-
- half h_ilow = (input_low_size == 1 ? input_low[0] : input_low[c]);
- half h_ihigh = (input_high_size == 1 ? input_high[0] : input_high[c]);
- half h_olow = (output_low_size == 1 ? output_low[0] : output_low[c]);
- half h_ohigh = (output_high_size == 1 ? output_high[0] : output_high[c]);
-
- half8 h_ilow8 = h_ilow;
- half8 h_ihigh8 = h_ihigh;
- half8 h_olow8 = h_olow;
- half8 h_ohigh8 = h_ohigh;
-
- float f_ilow = convert_float(h_ilow);
- float f_ihigh = convert_float(h_ihigh);
- float f_olow = convert_float(h_olow);
- float f_ohigh = convert_float(h_ohigh);
-
- float8 f_ilow8 = f_ilow;
- float8 f_ihigh8 = f_ihigh;
- float8 f_olow8 = f_olow;
- float8 f_ohigh8 = f_ohigh;
-
- float const1 = !(f_ihigh - f_ilow) ? 0.0f : convert_float(levels - 1) / (f_ihigh - f_ilow);
- float const2 = !(levels - 1) ? 0.0f : (f_ohigh - f_olow) / convert_float(levels - 1);
-
- for (int h = 0; h < H; h++) {
- int idx = h*W;
-
- __local half* addr_src = (__local half*)src_local + idx;
- __local half* addr_dst = (__local half*)dst_local + idx;
-
- for (int w = 0; w < W / 8; w++) {
- half8 h_src_val8 = (*((__local half8*)addr_src + w));
- float8 f_src_val8 = convert_float8(h_src_val8);
-
- short8 aux_cond1;
- aux_cond1.s0 = (h_src_val8.s0 <= h_ilow);
- aux_cond1.s1 = (h_src_val8.s1 <= h_ilow);
- aux_cond1.s2 = (h_src_val8.s2 <= h_ilow);
- aux_cond1.s3 = (h_src_val8.s3 <= h_ilow);
- aux_cond1.s4 = (h_src_val8.s4 <= h_ilow);
- aux_cond1.s5 = (h_src_val8.s5 <= h_ilow);
- aux_cond1.s6 = (h_src_val8.s6 <= h_ilow);
- aux_cond1.s7 = (h_src_val8.s7 <= h_ilow);
- aux_cond1 *= aux_cond1;
-
- short8 aux_cond2;
- aux_cond2.s0 = (h_src_val8.s0 > h_ihigh);
- aux_cond2.s1 = (h_src_val8.s1 > h_ihigh);
- aux_cond2.s2 = (h_src_val8.s2 > h_ihigh);
- aux_cond2.s3 = (h_src_val8.s3 > h_ihigh);
- aux_cond2.s4 = (h_src_val8.s4 > h_ihigh);
- aux_cond2.s5 = (h_src_val8.s5 > h_ihigh);
- aux_cond2.s6 = (h_src_val8.s6 > h_ihigh);
- aux_cond2.s7 = (h_src_val8.s7 > h_ihigh);
- aux_cond2 *= aux_cond2;
-
- short8 aux_cond3 = (!aux_cond1 & aux_cond2);
- short8 aux_cond4 = (!aux_cond1 & !aux_cond2);
- aux_cond3 *= aux_cond3;
- aux_cond4 *= aux_cond4;
-
- half8 cond1 = convert_half8(aux_cond1);
- half8 cond2 = convert_half8(aux_cond2);
- half8 cond3 = convert_half8(aux_cond3);
- half8 cond4 = convert_half8(aux_cond4);
-
- half8 aux;
- aux = convert_half8(round(((f_src_val8 - f_ilow8) * (float8)const1)) * (float8)const2 + f_olow8);
- half8 dst_val = (
- (h_olow8 * cond1) +
- (h_ohigh8 * cond3) +
- (aux * cond4)
- );
- *((__local half8*)addr_dst + w) = dst_val;
+ int h = get_global_id(1);
+ int H = get_global_size(1);
+
+ for (int c = 0; c < C; c++)
+ {
+ half h_ilow = (input_low_size == 1 ? input_low[0] : input_low[c]);
+ half h_ihigh = (input_high_size == 1 ? input_high[0] : input_high[c]);
+ half h_olow = (output_low_size == 1 ? output_low[0] : output_low[c]);
+ half h_ohigh = (output_high_size == 1 ? output_high[0] : output_high[c]);
+
+ half const1 = (half)(!(h_ihigh - h_ilow) ? 0.0f : convert_float(levels - 1) / (convert_float(h_ihigh) - convert_float(h_ilow)));
+ half const2 = (half)(!(levels - 1) ? 0.0f : (convert_float(h_ohigh) - convert_float(h_olow)) / convert_float(levels - 1));
+
+ __local const half* restrict addr_src = local_src + c*W;
+ __local half* restrict addr_dst = local_dst + c*W;
+
+ for (int w = 0; w < W / 8; w++)
+ {
+ half8 val = *((__local half8*)addr_src + w);
+#if 1
+ // round is too slow =( 902 b of code
+ //half8 aux = round((val - (half8)h_ilow) * (half8)const1);
+
+ half8 aux = (val - (half8)h_ilow) * (half8)const1 + (half8)0.5h;
+
+ aux = (half8){
+ (half)(short)(aux.s0),
+ (half)(short)(aux.s1),
+ (half)(short)(aux.s2),
+ (half)(short)(aux.s3),
+ (half)(short)(aux.s4),
+ (half)(short)(aux.s5),
+ (half)(short)(aux.s6),
+ (half)(short)(aux.s7)
+ };
+
+ aux = aux * (half8)const2 + (half8)h_olow;
+
+ // vector comparison add 756 b of assembly, so do in manually
+ // short8 a = val <= (half8)h_olow;
+ // short8 b = val > (half8)h_ohigh;
+
+ short8 a;
+ short8 b;
+ a.s0 = (val.s0 <= h_ilow);
+ a.s1 = (val.s1 <= h_ilow);
+ a.s2 = (val.s2 <= h_ilow);
+ a.s3 = (val.s3 <= h_ilow);
+ a.s4 = (val.s4 <= h_ilow);
+ a.s5 = (val.s5 <= h_ilow);
+ a.s6 = (val.s6 <= h_ilow);
+ a.s7 = (val.s7 <= h_ilow);
+
+ b.s0 = (val.s0 > h_ihigh);
+ b.s1 = (val.s1 > h_ihigh);
+ b.s2 = (val.s2 > h_ihigh);
+ b.s3 = (val.s3 > h_ihigh);
+ b.s4 = (val.s4 > h_ihigh);
+ b.s5 = (val.s5 > h_ihigh);
+ b.s6 = (val.s6 > h_ihigh);
+ b.s7 = (val.s7 > h_ihigh);
+
+ a = ~(a-(short8)1);
+ b = ~(b-(short8)1);
+
+ short8 c1 = (~a & b);
+ short8 c2 = (~a & ~b);
+
+ short8 res = a & as_short8((half8)h_olow)
+ | c1 & as_short8((half8)h_ohigh)
+ | c2 & as_short8(aux);
+
+ *((__local half8*)addr_dst + w) = as_half8(res);
+#else
+ *((__local half8*)addr_dst + w) = val;
+#endif
}
- for (int w = W & (~0x7); w < W; w++) {
- half h_src_val = addr_src[w];
- float f_src_val = convert_float(h_src_val);
- half dst_val;
-
- if (h_src_val <= h_ilow) {
- dst_val = h_olow;
- } else if (h_src_val > h_ihigh) {
- dst_val = h_ohigh;
- } else {
- dst_val = convert_half(round((f_src_val - f_ilow) * const1) * const2 + f_olow);
- }
- addr_dst[w] = dst_val;
+ for (int w = W & (~0x7); w < W; w++)
+ //for (int w = 0 ; w < W; w++)
+ {
+ half val = addr_src[w];
+#if 1
+ short a = val <= h_ilow; a = ~(a-1);
+ short b = val > h_ihigh; b = ~(b-1);
+
+ short c1 = (~a & b);
+ short c2 = (~a & ~b);
+
+ short res = a & as_short(h_olow)
+ | c1 & as_short(h_ohigh)
+ | c2 & as_short(((half)(round( (val - h_ilow) * const1) * const2) + h_olow));
+
+ addr_dst[w] = as_half(res);
+#else
+ addr_dst[w] = val;
+#endif
}
}
}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+__constant static half log_2_e = (half)1.442695040888963; // log2(exp(1.0))
+
+#define ALLOW_EARLY_RETURN 1
+
+#define USE_MANUAL_DMA 1
+
+#if USE_MANUAL_DMA
+
+static void inline logistic_activate(__local const half* restrict src,
+ __local half* restrict dst,
+ int offset)
+{
+ half val = src[offset];
+ val = 1.0h / (1.0h + exp2(val * -log_2_e));
+ dst[offset] = val;
+}
+
+__kernel void __dma_preload_region_chw(
+ __global const half* restrict src,
+ __global half* restrict _0,
+ __local half* restrict local_src,
+ __local half* restrict _1,
+ int W, /* 13 */
+ int H, /* 13 */
+ int classes, /* 20 */
+ int coords, /* 4 */
+ int num, /* 5 */
+ int maskSize,
+ int doSoftmax
+ )
+{
+ const int local_C = classes + coords + 1;
+ const int c = get_group_id(1)*local_C;
+ const int h = get_group_id(0);
+
+ WorkGroupDmaCreateStrideTransaction(
+ src + c*H*W + h*W, // src
+ local_src, // dst
+ W*sizeof(half), // src_width,
+ W*sizeof(half), // dst_width,
+ W*H*sizeof(half), // src_stride,
+ W*sizeof(half), // dst_stride,
+ W*local_C*sizeof(half), // size
+ 0);
+}
+
+__kernel void __dma_postwrite_region_chw(
+ __global half* restrict _0,
+ __global half* restrict dst,
+ __local half* restrict _1,
+ __local const half* restrict local_dst,
+ int W, /* 13 */
+ int H, /* 13 */
+ int classes, /* 20 */
+ int coords, /* 4 */
+ int num, /* 5 */
+ int maskSize,
+ int doSoftmax
+ )
+{
+ const int local_C = classes + coords + 1;
+ const int c = get_group_id(1)*local_C;
+ const int h = get_group_id(0);
+
+ WorkGroupDmaCreateStrideTransaction(
+ local_dst, // src
+ dst + c*H*W + h*W, // dst
+ W*sizeof(half), // src_width,
+ W*sizeof(half), // dst_width,
+ W*sizeof(half), // src_stride,
+ W*H*sizeof(half), // dst_stride,
+ W*local_C*sizeof(half), // size
+ 0);
+}
+
+__kernel void region_chw(
+ __global half* restrict src_data,
+ __global half* restrict dst_data,
+ __local const half* restrict local_src,
+ __local half* restrict local_dst,
+ int W, /* 13 */
+ int H, /* 13 */
+ int classes, /* 20 */
+ int coords, /* 4 */
+ int num, /* 5 */
+ int maskSize,
+ int doSoftmax
+ )
+{
+ const int w = get_local_id(0);
+
+#if ALLOW_EARLY_RETURN
+ if (w >= W) return;
+#endif
+
+ __local const half *restrict src = local_src + w;
+ __local half *restrict dst = local_dst + w;
+
+ const int stride = W;
+ logistic_activate(src, dst, 0*stride);
+ logistic_activate(src, dst, 1*stride);
+
+ //copy plane 2 and 3
+ dst[2*stride] = src[2*stride];
+ dst[3*stride] = src[3*stride];
+
+ logistic_activate(src, dst, 4*stride);
+
+ src += (coords + 1)*stride;
+ dst += (coords + 1)*stride;
+
+ if (doSoftmax)
+ {
+ half max_val = src[0];
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ max_val = max(max_val, src[c*stride]);
+ }
+
+ half expSum = 0.0h;
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ const half e = src[c*stride] - max_val;
+ const half tmp = exp2(e * log_2_e);
+ dst[c*stride] = tmp;
+ expSum += tmp;
+ }
+
+ const half invExpSum = 1.0h / expSum;
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ dst[c*stride] *= invExpSum;
+ }
+ }
+ else
+ {
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ logistic_activate(src, dst, c*stride);
+ }
+ }
+}
+
+__kernel void __dma_preload_region_hwc(
+ __global const half* restrict src,
+ __global half* restrict _0,
+ __local half* restrict local_src,
+ __local half* restrict _1,
+ int W, /* 13 */
+ int H, /* 13 */
+ int classes, /* 20 */
+ int coords, /* 4 */
+ int num, /* 5 */
+ int maskSize,
+ int doSoftmax
+ )
+{
+ const int local_C = classes + coords + 1;
+ const int c = get_group_id(1)*local_C;
+ const int h = get_group_id(0);
+ if (!doSoftmax) num = maskSize;
+ const int C = local_C*num;
+
+ WorkGroupDmaCreateStrideTransaction(
+ src + h*W*C + c, // src
+ local_src, // dst
+ local_C*sizeof(half), // src_width,
+ local_C*sizeof(half), // dst_width,
+ C*sizeof(half), // src_stride,
+ local_C*sizeof(half), // dst_stride,
+ local_C*W*sizeof(half), // size
+ 0);
+}
+
+__kernel void __dma_postwrite_region_hwc(
+ __global half* restrict _0,
+ __global half* restrict dst,
+ __local half* restrict _1,
+ __local const half* restrict local_dst,
+ int W, /* 13 */
+ int H, /* 13 */
+ int classes, /* 20 */
+ int coords, /* 4 */
+ int num, /* 5 */
+ int maskSize,
+ int doSoftmax
+ )
+{
+ // Region always outputs in CHW layout; same as postwrite_chw
+ const int local_C = classes + coords + 1;
+ const int c = get_group_id(1)*local_C;
+ const int h = get_group_id(0);
+
+ WorkGroupDmaCreateStrideTransaction(
+ local_dst, // src
+ dst + c*H*W + h*W, // dst
+ W*sizeof(half), // src_width,
+ W*sizeof(half), // dst_width,
+ W*sizeof(half), // src_stride,
+ W*H*sizeof(half), // dst_stride,
+ W*local_C*sizeof(half), // size
+ 0);
+}
+
+static void inline logistic_activate_hwc(__local const half* restrict src,
+ __local half* restrict dst,
+ int offset,
+ int stride)
+{
+ half val = src[offset];
+ val = 1.0h / (1.0h + exp2(val * -log_2_e));
+ dst[offset*stride] = val;
+}
+
+__kernel void region_hwc(
+ __global half* restrict src_data,
+ __global half* restrict dst_data,
+ __local const half* restrict local_src,
+ __local half* restrict local_dst,
+ int W, /* 13 */
+ int H, /* 13 */
+ int classes, /* 20 */
+ int coords, /* 4 */
+ int num, /* 5 */
+ int maskSize,
+ int doSoftmax
+ )
+{
+ const int w = get_local_id(0);
+
+#if ALLOW_EARLY_RETURN
+ if (w >= W) return;
+#endif
+
+ const int local_C = classes + coords + 1;
+
+ __local const half *restrict src = local_src + w*local_C;
+ __local half *restrict dst = local_dst + w;
+
+ const int stride = W;
+ logistic_activate_hwc(src, dst, 0, stride);
+ logistic_activate_hwc(src, dst, 1, stride);
+
+ //copy plane 2 and 3
+ dst[2*stride] = src[2];
+ dst[3*stride] = src[3];
+
+ logistic_activate_hwc(src, dst, 4, stride);
+
+ src += coords + 1;
+ dst += (coords + 1)*stride;
+
+ if (doSoftmax)
+ {
+ half max_val = src[0];
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ max_val = max(max_val, src[c]);
+ }
+
+ half expSum = 0.0h;
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ const half e = src[c] - max_val;
+ const half tmp = exp2(e * log_2_e);
+ dst[c*stride] = tmp;
+ expSum += tmp;
+ }
+
+ const half invExpSum = 1.0h / expSum;
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ dst[c*stride] *= invExpSum;
+ }
+ }
+ else
+ {
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ logistic_activate_hwc(src, dst, c, stride);
+ }
+ }
+}
+
+#else // defined (USE_MANUAL_DMA)
+
+#define NUM_CLASSES 80
+
+static void inline logistic_activate(__global const half* restrict src,
+ __global half* restrict dst,
+ int offset)
+{
+ half val = src[offset];
+ val = 1.0h / (1.0h + exp2(val * -log_2_e));
+ dst[offset] = val;
+}
+
+__kernel void region_chw(
+ __global const half* restrict global_src,
+ __global half* restrict global_dst,
+ __local half* restrict _0,
+ __local half* restrict _1,
+ int W, /* 13 */
+ int H, /* 13 */
+ int classes, /* 20 */
+ int coords, /* 4 */
+ int num, /* 5 */
+ int maskSize,
+ int doSoftmax
+ )
+{
+ const int w = get_local_id(0);
+
+#if ALLOW_EARLY_RETURN
+ if (w >= W) return;
+#endif
+
+ const int local_C = classes + coords + 1;
+ const int c = get_group_id(1)*local_C;
+ const int h = get_group_id(0);
+
+ __global const half *restrict src = global_src + c*H*W + h*W + w;
+ __global half *restrict dst = global_dst + c*H*W + h*W + w;
+
+ const int stride = H*W;
+ logistic_activate(src, dst, 0*stride);
+ logistic_activate(src, dst, 1*stride);
+
+ //copy plane 2 and 3
+ dst[2*stride] = src[2*stride];
+ dst[3*stride] = src[3*stride];
+
+ logistic_activate(src, dst, 4*stride);
+
+ src += (coords + 1)*stride;
+ dst += (coords + 1)*stride;
+
+ if (doSoftmax)
+ {
+ __private half data[NUM_CLASSES];
+
+ half max_val = src[0];
+ for (int c = 0; c < classes; c++)
+ {
+ half tmp = src[c*stride];
+ data[c] = tmp;
+ max_val = max(max_val, tmp);
+ }
+
+ half expSum = 0.0h;
+ for (int c = 0; c < classes; c++)
+ {
+ half tmp = half_exp(data[c] - max_val);
+ data[c] = tmp;
+ expSum += tmp;
+ }
+
+ for (int c = 0; c < classes; c++)
+ {
+ dst[c*stride] = data[c] / expSum;
+ }
+ }
+ else
+ {
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ logistic_activate(src, dst, c*stride);
+ }
+ }
+}
+
+static void inline logistic_activate_hwc(__global const half* restrict src,
+ __global half* restrict dst,
+ int offset,
+ int stride)
+{
+ half val = src[offset];
+ val = 1.0h / (1.0h + exp2(val * -log_2_e));
+ dst[offset*stride] = val;
+}
+
+
+__kernel void region_hwc(
+ __global const half* restrict global_src,
+ __global half* restrict global_dst,
+ __local half* restrict _0,
+ __local half* restrict _1,
+ int W, /* 13 */
+ int H, /* 13 */
+ int classes, /* 20 */
+ int coords, /* 4 */
+ int num, /* 5 */
+ int maskSize,
+ int doSoftmax
+ )
+{
+ const int w = get_local_id(0);
+
+#if ALLOW_EARLY_RETURN
+ if (w >= W) return;
+#endif
+
+ const int local_C = classes + coords + 1;
+ const int c = get_group_id(1)*local_C;
+ const int h = get_group_id(0);
+ const int C = num*local_C;
+
+ __global const half *restrict src = global_src + h*W*C + w*C + c;
+ __global half *restrict dst = global_dst + c*H*W + h*W + w;
+
+ const int stride = H*W;
+ logistic_activate_hwc(src, dst, 0, stride);
+ logistic_activate_hwc(src, dst, 1, stride);
+
+ //copy plane 2 and 3
+ dst[2*stride] = src[2];
+ dst[3*stride] = src[3];
+
+ logistic_activate_hwc(src, dst, 4, stride);
+
+ src += coords + 1;
+ dst += (coords + 1)*stride;
+
+ if (doSoftmax)
+ {
+ __private half data[NUM_CLASSES];
+
+ half max_val = src[0];
+ for (int c = 0; c < classes; c++)
+ {
+ half tmp = src[c];
+ data[c] = tmp;
+ max_val = max(max_val, tmp);
+ }
+
+ half expSum = 0.0h;
+ for (int c = 0; c < classes; c++)
+ {
+ half tmp = half_exp(data[c] - max_val);
+ data[c] = tmp;
+ expSum += tmp;
+ }
+
+ for (int c = 0; c < classes; c++)
+ {
+ dst[c*stride] = data[c] / expSum;
+ }
+ }
+ else
+ {
+ #pragma unroll 4
+ for (int c = 0; c < classes; c++)
+ {
+ logistic_activate_hwc(src, dst, c, stride);
+ }
+ }
+}
+
+#endif // defined (USE_MANUAL_DMA)
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-__kernel void reorg_NCHW(__global const half* restrict src,
- __global half* restrict out,
- int H,
- int W,
- int stride)
+#define USE_MANUAL_DMA
+
+#if defined (USE_MANUAL_DMA)
+
+__kernel void __dma_preload_reorg_chw(__global half const *restrict src,
+ __global half *restrict dst,
+ int W,
+ int H,
+ int C,
+ int stride,
+ __local half *restrict local_src,
+ __local half *restrict local_dst
+ )
+{
+ const int stride_y = get_group_id(1);
+
+ const int srcIdx = stride_y*W*stride + W*stride*stride*get_group_id(0);
+
+ WorkGroupDmaCreateStrideTransaction(
+ src + srcIdx, // src
+ local_src, // dst
+ W * stride * sizeof(half), // src width
+ W * stride * sizeof(half), // dst width
+ W * stride * stride * get_num_groups(0) * sizeof(half), // src stride
+ W * stride * sizeof(half), // dst stride
+ W * stride * get_local_size(0) * sizeof(half), //total size
+ 0);
+}
+
+__kernel void __dma_postwrite_reorg_chw(__global half const *restrict src,
+ __global half *restrict dst,
+ int W,
+ int H,
+ int C,
+ int stride,
+ __local half *restrict local_src,
+ __local half const *restrict local_dst
+ )
+{
+ const int stride_y = get_group_id(1);
+
+ const int dstIdx = stride_y*W*stride*get_global_size(0) + get_group_id(0)*W;
+
+ WorkGroupDmaCreateStrideTransaction(
+ local_dst, // src
+ dst + dstIdx, // dst
+ W * sizeof(half), // src width
+ W * sizeof(half), // dst width
+ W * sizeof(half), // src stride
+ W * get_num_groups(0) * sizeof(half), // dst stride
+ get_local_size(0) * W * stride * sizeof(half), //total size
+ 0);
+}
+
+__kernel void reorg_chw(__global half const *restrict src,
+ __global half *restrict dst,
+ int W,
+ int H,
+ int C,
+ int stride,
+ __local half *restrict local_src,
+ __local half *restrict local_dst
+ )
{
- int h = min((int)get_global_id(0), H-1);
+ const int c = get_local_id(0);
+ const int stride_x = get_local_id(1);
- int c = get_global_id(1);
- int C = get_global_size(1);
- int C2 = C/(stride*stride);
+ const int srcIdx = stride_x + c*W*stride;
+ const int dstIdx = stride_x*W*get_local_size(0) + c*W;
- int offset = c / C2;
+ int x = 0;
+ for (; x <= W - 8; x += 8) {
+ half8 data = (half8) {
+ local_src[srcIdx + (x + 0)*stride], local_src[srcIdx + (x + 1)*stride],
+ local_src[srcIdx + (x + 2)*stride], local_src[srcIdx + (x + 3)*stride],
+ local_src[srcIdx + (x + 4)*stride], local_src[srcIdx + (x + 5)*stride],
+ local_src[srcIdx + (x + 6)*stride], local_src[srcIdx + (x + 7)*stride]
+ };
- int c2 = c - C2 * offset;
+ *((__local half8*)(&local_dst[dstIdx + x])) = data;
+ }
- int H2 = H*stride;
- int W2 = W*stride;
+ for (; x < W; x++) {
+ local_dst[dstIdx + x] = local_src[srcIdx + x*stride];
+ }
+}
- for (int w = 0; w < W; ++w)
- {
- int h2 = h*stride + offset / stride;
- int w2 = w*stride + offset - stride * (offset / stride);
+#else
- out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2];
+__kernel void reorg_chw(__global half const *restrict src,
+ __global half *restrict dst,
+ int W,
+ int H,
+ int C,
+ int stride,
+ __local half const *restrict _0,
+ __local half *restrict _1
+ )
+{
+ const int stride_x = get_local_id(1);
+ const int stride_y = get_group_id(1);
+ const int N = get_global_size(0);
+ const int c = get_local_id(0)*get_num_groups(0) + get_group_id(0);
+
+ const int srcIdx = c*W*stride*stride + stride_x + stride_y*W*stride;
+ const int dstIdx = c*W + stride_x*W*N + stride_y*W*N*stride;
+
+ #pragma unroll 8
+ for (int x = 0; x < W; x++) {
+ dst[dstIdx + x] = src[srcIdx + x*stride];
}
}
+
+#endif
+
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#define MIN(v1, v2) ((v1) < (v2) ? (v1) : (v2))
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-__kernel void reorg(__global half* restrict src,
- __global half* restrict out,
- int h,
- int w,
- int stride)
+__kernel void __dma_preload_reorg_hwc(__global half const *restrict src,
+ __global half *restrict _0,
+ int W,
+ int H,
+ int C,
+ int stride,
+ __local half *restrict local_src,
+ __local half *restrict _1
+ )
{
- int j = MIN(get_global_id(0), h-1);
+ const int stride_x = get_group_id(1);
+
+ WorkGroupDmaCreateStrideTransaction(
+ src + get_group_id(0) * stride + stride_x * C, // src
+ local_src, // dst
+ stride * sizeof(half), // src_width,
+ stride * sizeof(half), // dst_width,
+ C * stride * sizeof(half), // src_stride,
+ stride * sizeof(half), // dst_stride,
+ H * W * sizeof(half), // size
+ 0);
+}
- int k = get_global_id(1);
- int c = get_global_size(1);
+__kernel void __dma_postwrite_reorg_hwc(__global half const *restrict _0,
+ __global half *restrict dst,
+ int W,
+ int H,
+ int C,
+ int stride,
+ __local half *restrict _1,
+ __local half *restrict local_dst
+ )
+{
+ const int stride_x = get_group_id(1);
- int out_c = c / (stride * stride);
- int oc = c * (stride * stride);
- int oh = h / stride;
- int ow = w / stride;
+ WorkGroupDmaCreateStrideTransaction(
+ local_dst, // src
+ dst + stride_x * C + get_group_id(0) * stride, // dst
+ stride * sizeof(half), // src_width,
+ stride * sizeof(half), // dst_width,
+ stride * sizeof(half), // src_stride,
+ C * stride * sizeof(half), // dst_stride,
+ W * H * sizeof(half), // size
+ 0);
+}
- int in_index = w * (j + h*k);
+__kernel void reorg_hwc(__global half const *restrict src,
+ __global half *restrict dst,
+ int W,
+ int H,
+ int C,
+ int stride,
+ __local half *restrict local_src,
+ __local half *restrict local_dst
+ )
+{
+ const int stride_y = get_local_id(1);
+ const int blocks = get_local_size(0);
+ const int b = get_local_id(0);
- int new_z = in_index / (oh*ow);
- int new_y = (in_index %(oh*ow)) / ow;
- int new_x = (in_index %(oh*ow)) % ow;
- int new_index = new_z + new_x * oc + new_y * oc * ow;
+ const int OC = stride * stride;
+ const int OH = H / stride;
+ const int OW = W / stride;
+ const int IC = stride;
+ const int IH = H;
+ const int IW = W / stride;
- in_index++;
+ for (int block_h = 0; block_h < stride; block_h++) {
+ const int src_line = b * stride * stride + stride_y * stride + block_h;
+ const int c = src_line / IH;
+ const int h = src_line % IH;
- int c2 = k % out_c;
- int offset = k / out_c;
- int w2 = 0 * stride + offset % stride;
- int h2 = j * stride + offset / stride;
- int out_index = w2 + w * stride * (h2 + h * stride * c2);
+ const int dst_line = b * stride + stride_y * blocks * stride + block_h;
+ const int oc = dst_line / OH;
+ const int oh = dst_line % OH;
- for (int i = 0; i < w; ++i, out_index+=stride, in_index++)
+ for (int w = 0; w < W / stride; w++) {
+ local_dst[oh*OW*OC + w*OC + oc] = local_src[h*IW*IC + w*IC + c];
+ }
+ }
+}
+
+__kernel void reorg_hwc_naive(__global half const *restrict src,
+ __global half *restrict dst,
+ int W,
+ int H,
+ int C,
+ int stride,
+ __local half *restrict local_src,
+ __local half *restrict local_dst
+ )
+{
+ const int out_c = C / (stride * stride);
+ const int oc = C * (stride * stride);
+ const int oh = H / stride;
+ const int ow = W / stride;
+
+ const int c = get_global_id(0);
+
+ for (int h = 0; h < H; ++h)
{
- // repacking coordinates
- int k0 = out_index / (h*w);
- int j0 = (out_index % (h*w)) / w;
- int i0 = (out_index % (h*w)) % w;
- int out_index_repack = k0 + c * i0 + c * w * j0;
- out[new_index] = src[out_index_repack];
-
- int new_z = in_index / (oh*ow);
+ int in_index = W * (h + H*c) + (0);
+ int new_z = in_index / (oh*ow);
int new_y = (in_index %(oh*ow)) / ow;
int new_x = (in_index %(oh*ow)) % ow;
- new_index = new_z + new_x * oc + new_y * oc * ow;
+ int new_index = new_z + new_x * oc + new_y * oc * ow;
+
+ in_index++;
+
+ int c2 = c % out_c;
+ int offset = c / out_c;
+ int w2 = 0 * stride + offset % stride;
+ int h2 = h * stride + offset / stride;
+ int out_index = w2 + W * stride * (h2 + H * stride * c2);
+
+ #pragma unroll 2
+ for(int i = 0; i < W; ++i, out_index+=stride, in_index++)
+ {
+ // repacking coordinates
+ int k0 = out_index / (H*W);
+ int j0 = (out_index % (H*W)) / W;
+ int i0 = (out_index % (H*W)) % W;
+ int out_index_repack = k0 + C * i0 + C * W * j0;
+
+ dst[new_index] = src[out_index_repack];
+
+ int new_z = in_index / (oh*ow);
+ int new_y = (in_index %(oh*ow)) / ow;
+ int new_x = (in_index %(oh*ow)) % ow;
+ new_index = new_z + new_x * oc + new_y * oc * ow;
+ }
}
}
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define ROUND(x) (int)(round(x))
#endif
+inline int out_to_in(float ox, float f) {
+ return (int)((ox + 0.5f) * f);
+}
+
+#define USE_MANUAL_DMA
+
+#if defined (USE_MANUAL_DMA)
+
+void interpolationCHW_nn(__local half* psrc, __local half* pdst, int OW, int IW, int C, float rw, float rh)
+{
+ float alpha = rh / 2.0f - 0.5f;
+
+ for (int w = 0; w < OW/8; w++)
+ {
+ float fw0 = rw*(w*8+0) + alpha;
+ float fw1 = rw*(w*8+1) + alpha;
+ float fw2 = rw*(w*8+2) + alpha;
+ float fw3 = rw*(w*8+3) + alpha;
+
+ float fw4 = rw*(w*8+4) + alpha;
+ float fw5 = rw*(w*8+5) + alpha;
+ float fw6 = rw*(w*8+6) + alpha;
+ float fw7 = rw*(w*8+7) + alpha;
+
+ int iw0 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw0), IW-1);
+ int iw1 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw1), IW-1);
+ int iw2 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw2), IW-1);
+ int iw3 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw3), IW-1);
+
+ int iw4 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw4), IW-1);
+ int iw5 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw5), IW-1);
+ int iw6 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw6), IW-1);
+ int iw7 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw7), IW-1);
+
+ for (int c = 0; c < C; c++)
+ {
+ half8 val = {
+ *((__local half*)(psrc + c * IW + iw0)),
+ *((__local half*)(psrc + c * IW + iw1)),
+
+ *((__local half*)(psrc + c * IW + iw2)),
+ *((__local half*)(psrc + c * IW + iw3)),
+
+ *((__local half*)(psrc + c * IW + iw4)),
+ *((__local half*)(psrc + c * IW + iw5)),
+
+ *((__local half*)(psrc + c * IW + iw6)),
+ *((__local half*)(psrc + c * IW + iw7)),
+ };
+ *((__local half8*)(pdst + c * OW + w*8)) = val;
+ }
+ }
+
+ for (int w = OW/8*8; w < OW; w++)
+ {
+ float fw = rw*w + alpha;
+ int iw0 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw), IW-1);
+
+ for (int c = 0; c < C; c++)
+ {
+ *((__local half*)(pdst + c * OW + w)) = *((__local half*)(psrc + c * IW + iw0));
+ }
+ }
+}
+
+__kernel void __dma_preload_resample_nearest(__global const half* restrict src,
+ __global half* restrict _0,
+ __local half* restrict local_src,
+ __local half* restrict _1,
+ int iw,
+ int ih,
+ float factor,
+ int ow,
+ int oh,
+ int channels)
+{
+ const int oy_first = get_group_id(1) * get_local_size(1);
+ const int oy_last = (get_group_id(1) + 1) * get_local_size(1) - 1;
+ const int iy_first = out_to_in(oy_first, 1.0 / factor);
+ const int iy_last = out_to_in(oy_last, 1.0 /factor);
+ const int iy_size = iy_last - iy_first + 1;
+
+ WorkGroupDmaCreateStrideTransaction(
+ src + get_group_id(2)*channels*ih*iw + iy_first*iw, // src
+ local_src, // dst
+ iy_size * iw * sizeof(half), // src_width,
+ iy_size * iw * sizeof(half), // dst_width,
+ ih * iw * sizeof(half), // src_stride,
+ iy_size * iw * sizeof(half), // dst_stride,
+ channels * iy_size * iw * sizeof(half), // size
+ 0);
+}
+
+__kernel void __dma_postwrite_resample_nearest(__global const half* restrict _0,
+ __global half* restrict dst,
+ __local half* restrict _1,
+ __local half* restrict local_dst,
+ int iw,
+ int ih,
+ float factor,
+ int ow,
+ int oh,
+ int channels)
+{
+
+ WorkGroupDmaCreateStrideTransaction(
+ local_dst, // src
+ dst + get_group_id(2)*channels*get_global_size(1)*ow + get_group_id(1)*get_local_size(1)*ow, // dst
+ get_local_size(1) * ow * sizeof(half), // src_width,
+ get_local_size(1) * ow * sizeof(half), // dst_width,
+ get_local_size(1) * ow * sizeof(half), // src_stride,
+ get_global_size(1) * ow * sizeof(half), // dst_stride,
+ channels * get_local_size(1) * ow * sizeof(half), // size
+ 0);
+}
+
kernel void resample_nearest(__global const half* restrict src,
__global half* restrict dst,
+ __local half* restrict local_src,
+ __local half* restrict local_dst,
int iw,
int ih,
float factor,
int oh,
int channels)
{
- int oy = min((int)get_global_id(0), oh-1);
- int c = get_global_id(1);
- int b = get_global_id(2);
-
- float fx = 1.f / factor;
- float fy = 1.f / factor;
-
- __global const half* start_src = src + b * iw * ih * channels + iw * ih * c;
- __global half* start_dst = dst + b * ow * oh * channels + ow * oh * c;
-
- for (int ox = 0; ox < ow; ox++)
- {
- float ix_r0 = ox*fx + fx / 2.0f - 0.5f;
- float iy_r0 = oy*fy + fy / 2.0f - 0.5f;
+ interpolationCHW_nn(local_src, local_dst, ow, iw, channels, 1.0 / factor, 1.0 / factor);
+}
- int ix_r1 = ROUND(ix_r0);
- int iy_r1 = ROUND(iy_r0);
+#else // defined (USE_MANUAL_DMA)
- ix_r1 = max(ix_r1, 0);
- ix_r1 = min(ix_r1, iw - 1);
+kernel void resample_nearest(__global const half* restrict src,
+ __global half* restrict dst,
+ __local half* restrict local_src,
+ __local half* restrict local_dst,
+ int iw,
+ int ih,
+ float factor,
+ int ow,
+ int oh,
+ int channels)
+{
+ const float inv_factor = 1.0f / factor;
+ const int iy = out_to_in(get_global_id(1), inv_factor);
- iy_r1 = max(iy_r1, 0);
- iy_r1 = min(iy_r1, ih - 1);
+ __global half* dst_data = dst + get_global_id(1)*ow;
+ __global half* src_data = src + iy*iw;
- start_dst[oy * ow + ox] = start_src[iy_r1 * iw + ix_r1];
+ for (int ox = 0; ox < ow; ++ox)
+ {
+ const int ix = out_to_in(ox, inv_factor);
+ for (int c = 0; c < channels; c++) {
+ dst_data[c*oh*ow + ox] = src_data[c*ih*iw + ix];
+ }
}
}
+
+#endif // defined (USE_MANUAL_DMA)
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#define USE_OPTIMIZED_ROUND
+
+#ifdef USE_OPTIMIZED_ROUND
+ #define ROUND(x) ((int)((x) + 0.5f))
+#else
+ #define ROUND(x) (int)(round(x))
+#endif
+
+
+inline int out_to_in(float ox, float f) {
+#ifdef USE_OPTIMIZED_ROUND
+ return (int)((ox + 0.5f) / f);
+#else
+ return ROUND((ox + 0.5f) / f - 0.5f);
+#endif
+}
+
static inline float triangleCoeff(float x)
{
- return 1.0f - fabs(x);//fmax(0.0f, 1 - fabs(x));
+ return 1.0f - fabs(x);
}
static inline float4 triangleCoeff4(float4 x)
{
- return 1.0f - fabs(x);//fmax(0.0f, 1 - fabs(x));
+ return 1.0f - fabs(x);
+}
+
+static inline half triangleCoeffHalf(half x)
+{
+ return 1.0h - fabs(x);
+}
+
+static inline half4 triangleCoeffHalf4(half4 x)
+{
+ return 1.0h - fabs(x);
+}
+
+static inline half8 triangleCoeffHalf8(half8 x)
+{
+ return 1.0h - fabs(x);
+}
+
+#define USE_MANUAL_DMA
+
+#if defined (USE_MANUAL_DMA)
+
+__kernel void __dma_preload_resample_with_antialias(__global const half* restrict src,
+ __global half* restrict _0,
+ __local half* restrict local_src,
+ __local half* restrict _1,
+ int iw,
+ int ih,
+ float factor,
+ int ow,
+ int oh,
+ int channels)
+{
+ const int r = (factor > 1.0f) ? 2 : ceil(1.0f / factor);
+ const int oy_first = get_group_id(1) * get_local_size(1);
+ const int oy_last = (get_group_id(1) + 1) * get_local_size(1) - 1;
+ const int iy_first = max(out_to_in(oy_first, factor) - r, 0);
+ const int iy_last = min(out_to_in(oy_last, factor) + r, ih - 1);
+ const int iy_size = iy_last - iy_first + 1;
+
+ WorkGroupDmaCreateStrideTransaction(
+ src + get_group_id(2)*get_local_size(2)*ih*iw + iy_first*iw, // src
+ local_src, // dst
+ iy_size * iw * sizeof(half), // src_width,
+ iy_size * iw * sizeof(half), // dst_width,
+ ih * iw * sizeof(half), // src_stride,
+ iy_size * iw * sizeof(half), // dst_stride,
+ get_local_size(2) * iy_size * iw * sizeof(half), // size
+ 0);
+}
+
+__kernel void __dma_postwrite_resample_with_antialias(__global const half* restrict _0,
+ __global half* restrict dst,
+ __local half* restrict _1,
+ __local half* restrict dst_local,
+ int iw,
+ int ih,
+ float factor,
+ int ow,
+ int oh,
+ int channels)
+{
+ WorkGroupDmaCreateStrideTransaction(
+ dst_local, // src
+ dst + get_group_id(2)*get_local_size(2)*get_global_size(1)*ow + get_group_id(1)*get_local_size(1)*ow, // dst
+ get_local_size(1) * ow * sizeof(half), // src_width,
+ get_local_size(1) * ow * sizeof(half), // dst_width,
+ get_local_size(1) * ow * sizeof(half), // src_stride,
+ get_global_size(1) * ow * sizeof(half), // dst_stride,
+ get_local_size(2) * get_local_size(1) * ow * sizeof(half), // size
+ 0);
}
__kernel void resample_with_antialias(const __global half* restrict src,
__global half* restrict dst,
+ __local half* restrict local_src,
+ __local half* restrict local_dst,
int iw,
int ih,
float factor,
int oh,
int channels)
{
- int oy = min((int)get_global_id(0), oh-1);
- int c = get_global_id(1);
- int b = get_global_id(2);
+ const int r = (factor > 1.0f) ? 2 : ceil(1.0f / factor);
+ const int oy_first = get_group_id(1) * get_local_size(1);
+ const int oy_last = (get_group_id(1) + 1) * get_local_size(1) - 1;
+ const int iy_first = max(out_to_in(oy_first, factor) - r, 0);
+ const int iy_last = min(out_to_in(oy_last, factor) + r, ih - 1);
+ const int iy_size = iy_last - iy_first + 1;
+ const int oy = get_global_id(1);
+ const float iy_f = ((oy + 0.5f) / factor - 0.5f) - iy_first;
+ const int iy = ROUND(iy_f);
+
+ __local half const *restrict start_src = local_src + iw * get_local_id(1) + iw * iy_size * get_local_id(2);
+ __local half *restrict start_dst = local_dst + ow * get_local_id(1) + ow * get_local_size(1) * get_local_id(2);
+
+ for (int ox = 0; ox < ow; ox++)
+ {
+ const float ix_f = (float)((ox + 0.5f) / factor) - 0.5f;
+ const int ix_i = ROUND(ix_f);
+
+ float4 v_sum = 0.f;
+ float4 v_wsum = 0.f;
+ for (int y = 0; y < iy_size; y++)
+ {
+ float dy = iy_f - y;
+ int x = max(ix_i - r, 0);
+ int end_x = min(ix_i + r, iw - 1);
+
+ float4 dx;
+ for (int i = 0; i < 4; i++)
+ dx[i] = ix_f - x - i;
- float fx = 1.f / factor;
- float fy = 1.f / factor;
+ for (; x < end_x - 3; x += 4, dx -= 4)
+ {
+ float4 w = factor*triangleCoeff4(factor*dx) * factor*triangleCoeff(factor*dy);
+ float4 src_vec = { start_src[y*iw + x + 0],
+ start_src[y*iw + x + 1],
+ start_src[y*iw + x + 2],
+ start_src[y*iw + x + 3] };
+
+ v_sum += w * src_vec;
+ v_wsum += w;
+ }
+
+ for (; x <= end_x; x++)
+ {
+ float dx = ix_f - x;
+ float w = factor*triangleCoeff(factor*dx) * factor*triangleCoeff(factor*dy);
+
+ v_sum[0] += w * start_src[y*iw + x];
+ v_wsum[0] += w;
+ }
+ }
- float ax = 1.0f / fx;
- float ay = 1.0f / fy;
+ v_sum[0] = v_sum[0] + v_sum[1] + v_sum[2] + v_sum[3];
+ v_wsum[0] = v_wsum[0] + v_wsum[1] + v_wsum[2] + v_wsum[3];
- int rx = (fx < 1.0f) ? 2 : ceil((1.0f)/ax);
- int ry = (fy < 1.0f) ? 2 : ceil((1.0f)/ay);
+ start_dst[get_local_id(1)*ow + ox] = (!v_wsum[0]) ? 0.0f : (half)(v_sum[0] / v_wsum[0]);
+ }
+}
- const __global half* restrict start_src = src + b * iw * ih * channels + iw * ih * c;
- __global half* restrict start_dst = dst + b * ow * oh * channels + ow * oh * c;
+#else
- float iy_r0 = oy*fy + fy / 2.0f - 0.5f;
- int iy_r1 = (int)(round(iy_r0));
+__kernel void resample_with_antialias(const __global half* restrict src,
+ __global half* restrict dst,
+ __local half* restrict _0,
+ __local half* restrict _1,
+ int iw,
+ int ih,
+ float factor,
+ int ow,
+ int oh,
+ int channels)
+{
+ int oy = get_global_id(1);
+ int c = get_global_id(2);
+
+ int r = (factor > 1.0f) ? 2 : ceil((1.0f)/factor);
+
+ const __global half* restrict start_src = src + iw * ih * c;
+ __global half* restrict start_dst = dst + ow * oh * c;
+
+ float iy_f = (oy + 0.5) / factor - 0.5f;
+ int iy_i = ROUND(iy_f);
for (int ox = 0; ox < ow; ox++)
{
- float ix_r0 = ox*fx + fx / 2.0f - 0.5f;
- int ix_r1 = (int)(round(ix_r0));
+ float ix_f = (ox + 0.5) / factor - 0.5f;
+ int ix_i = ROUND(ix_f);
float4 v_sum = 0.f;
float4 v_wsum = 0.f;
- for (int y = max(iy_r1 - ry, 0);
- y <= min(iy_r1 + ry, (int)ih - 1); y++)
+ for (int y = max(iy_i - r, 0); y <= min(iy_i + r, (int)ih - 1); y++)
{
- float dy = iy_r0 - y;
- int x = max(ix_r1 - rx, 0);
- int end_x = min(ix_r1 + rx, (int)iw - 1);
+ float dy = iy_f - y;
+ int x = max(ix_i - r, 0);
+ int end_x = min(ix_i + r, (int)iw - 1);
float4 dx;
for (int i = 0; i < 4; i++)
- dx[i] = ix_r0 - x - i;
+ dx[i] = ix_f - x - i;
for (; x <= end_x - 3; x += 4, dx -= 4)
{
- float4 w = ax*triangleCoeff4(ax*dx) * ay*triangleCoeff(ay*dy);
+ float4 w = factor*triangleCoeff4(factor*dx) * factor*triangleCoeff(factor*dy);
float4 src_vec = { start_src[y*iw + x + 0],
start_src[y*iw + x + 1],
start_src[y*iw + x + 2],
for (; x <= end_x; x++)
{
- float dx = ix_r0 - x;
- float w = ax*triangleCoeff(ax*dx) * ay*triangleCoeff(ay*dy);
+ float dx = ix_f - x;
+ float w = factor*triangleCoeff(factor*dx) * factor*triangleCoeff(factor*dy);
v_sum[0] += w * start_src[y*iw + x];
v_wsum[0] += w;
start_dst[oy*ow + ox] = (!v_wsum[0]) ? (half)0.0f : (half)(v_sum[0] / v_wsum[0]);
}
}
+
+#endif
// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define MAX_WIDTH 512
+#define MIN(a, b) ((a) < (b)) ? (a) : (b);
+
+__kernel void __dma_postwrite_ocl_st(__global half const *const restrict src_data,
+ __global half const *const restrict theta,
+ __global half *const restrict dst_data,
+ int C,
+ int W,
+ __local half const *const restrict local_dst)
+{
+ const int x0 = get_global_id(0) * MAX_WIDTH;
+ const int x1 = MIN(x0 + MAX_WIDTH, W);
+ const int length = x1 - x0;
+
+ WorkGroupDmaCreate3DTransaction(
+ local_dst, // src
+ dst_data + get_global_id(1) * W + x0, // dst
+ length * sizeof(half), // src width
+ length * sizeof(half), // dst width
+ length * sizeof(half), // src stride
+ W * sizeof(half), // dst stride
+ C, // num planes
+ get_local_size(1) * length * sizeof(half), // src plane stride
+ get_global_size(1) * W * sizeof(half), // dst plane stride
+ get_local_size(1) * length * sizeof(half), // plane size
+ 0);
+}
+
+__attribute__((noinline))
+void calcInd(__global half const *const restrict theta,
+ half *const restrict weight,
+ int *const restrict ind,
+ int y, int H, int x0, int length, int step, int W)
+{
+ float a = (float)y * 1.0f / H * 2 - 1;
+
+ int x = 0;
+
+ float8 va = (float8) {a, a, a, a, a, a, a, a};
+ float8 vxy = (float8) {x0 + 0, x0 + 1, x0 + 2, x0 + 3,
+ x0 + 4, x0 + 5, x0 + 6, x0 + 7};
+
+ for (; x <= length - 8; x += 8, vxy += 8)
+ {
+ float8 va1 = vxy * 1.0f / W * 2 - 1.f;
+
+ float8 vx = (va * theta[0] + va1 * theta[1] + theta[2] + 1.f) / 2.f * H;
+ float8 vy = (va * theta[3] + va1 * theta[4] + theta[5] + 1.f) / 2.f * W;
+
+ const int8 ix = convert_int8(vx) - ((vx < 0) & 1);
+ const int8 iy = convert_int8(vy) - ((vy < 0) & 1);
+
+ float8 ax = vx - convert_float8(ix);
+ float8 ay = vy - convert_float8(iy);
+ float8 bx = 1.f - ax;
+ float8 by = 1.f - ay;
+
+ union {int8 d; uint8 i; } check_x;
+
+ check_x.d = ix;
+ int8 b01 = check_x.i < (uint8)H;
+
+ check_x.d = ix + 1;
+ int8 b45 = check_x.i < (uint8)H;
+
+ union {int8 d; uint8 i; } check_y;
+
+ check_y.d = iy;
+ int8 b23 = check_y.i < (uint8)W;
+
+ check_y.d = iy + 1;
+ int8 b67 = check_y.i < (uint8)W;
+
+ int8 b0123 = b01 & b23;
+ int8 b0167 = b01 & b67;
+ int8 b4523 = b45 & b23;
+ int8 b4567 = b45 & b67;
+
+ int8 TL_id = ((ix + 0) * W + (iy + 0)) * (b0123 & 1);
+ int8 BL_id = ((ix + 1) * W + (iy + 0)) * (b4523 & 1);
+ int8 TR_id = ((ix + 0) * W + (iy + 1)) * (b0167 & 1);
+ int8 BR_id = ((ix + 1) * W + (iy + 1)) * (b4567 & 1);
+
+ union {float8 f; int8 i;} w0; w0.f = bx * by;
+ union {float8 f; int8 i;} w1; w1.f = ax * by;
+ union {float8 f; int8 i;} w2; w2.f = bx * ay;
+ union {float8 f; int8 i;} w3; w3.f = ax * ay;
+
+ w0.i = w0.i & b0123;
+ w1.i = w1.i & b4523;
+ w2.i = w2.i & b0167;
+ w3.i = w3.i & b4567;
+
+ *((half8*)(weight + x + 0*step)) = convert_half8(w0.f);
+ *((half8*)(weight + x + 1*step)) = convert_half8(w1.f);
+ *((half8*)(weight + x + 2*step)) = convert_half8(w2.f);
+ *((half8*)(weight + x + 3*step)) = convert_half8(w3.f);
+
+ *((int8*)(ind + x + 0*step)) = TL_id;
+ *((int8*)(ind + x + 1*step)) = BL_id;
+ *((int8*)(ind + x + 2*step)) = TR_id;
+ *((int8*)(ind + x + 3*step)) = BR_id;
+ }
+
+ for (; x < length; x++)
+ {
+ float a1 = (float)(x0 + x) * 1.0f / W * 2 - 1;
+
+ float fx = (a * theta[0] + a1 * theta[1] + theta[2] + 1)/2 * H;
+ float fy = (a * theta[3] + a1 * theta[4] + theta[5] + 1)/2 * W;
+
+ const int ix = (int)(fx) - (fx < 0);
+ const int iy = (int)(fy) - (fy < 0);
+
+ float ax = fx - ix;
+ float ay = fy - iy;
+ float bx = 1 - ax;
+ float by = 1 - ay;
+
+ int b0 = ix >= 0;
+ int b4 = ix >= -1;
+ int b1 = ix < H;
+ int b5 = ix < H-1;
+
+ int b2 = iy >= 0;
+ int b6 = iy >= -1;
+ int b3 = iy < W;
+ int b7 = iy < W-1;
+
+ int b01 = b0 & b1;
+ int b23 = b2 & b3;
+ int b45 = b4 & b5;
+ int b67 = b6 & b7;
+
+ int b0123 = b01 & b23;
+ int b0167 = b01 & b67;
+ int b4523 = b45 & b23;
+ int b4567 = b45 & b67;
+
+ int TL_id = ((ix + 0) * W + (iy + 0)) * b0123;
+ int BL_id = ((ix + 1) * W + (iy + 0)) * b4523;
+ int TR_id = ((ix + 0) * W + (iy + 1)) * b0167;
+ int BR_id = ((ix + 1) * W + (iy + 1)) * b4567;
+
+ half w0 = bx*by*b0123;
+ half w1 = ax*by*b4523;
+ half w2 = bx*ay*b0167;
+ half w3 = ax*ay*b4567;
+
+ weight[x + 0*step] = w0;
+ weight[x + 1*step] = w1;
+ weight[x + 2*step] = w2;
+ weight[x + 3*step] = w3;
+
+ ind[x + 0*step] = TL_id;
+ ind[x + 1*step] = BL_id;
+ ind[x + 2*step] = TR_id;
+ ind[x + 3*step] = BR_id;
+ }
+}
+
+__attribute__((noinline))
+void apply(__global half const *const restrict src,
+ half const *const restrict weight,
+ int const *const restrict ind,
+ __local half *const restrict dst,
+ int length,
+ int step)
+{
+ int x = 0;
+ for(; x <= length - 8; x += 8)
+ {
+ int8 TL_id = *((int8*)(ind + x + 0*step));
+ int8 BL_id = *((int8*)(ind + x + 1*step));
+ int8 TR_id = *((int8*)(ind + x + 2*step));
+ int8 BR_id = *((int8*)(ind + x + 3*step));
+
+ half8 w00 = *((half8*)(weight + x + 0*step));
+ half8 w01 = *((half8*)(weight + x + 1*step));
+ half8 w02 = *((half8*)(weight + x + 2*step));
+ half8 w03 = *((half8*)(weight + x + 3*step));
+
+ half8 TL = (half8){src[TL_id[0]], src[TL_id[1]], src[TL_id[2]], src[TL_id[3]],
+ src[TL_id[4]], src[TL_id[5]], src[TL_id[6]], src[TL_id[7]]};
+ half8 TR = (half8){src[TR_id[0]], src[TR_id[1]], src[TR_id[2]], src[TR_id[3]],
+ src[TR_id[4]], src[TR_id[5]], src[TR_id[6]], src[TR_id[7]]};
+ half8 BL = (half8){src[BL_id[0]], src[BL_id[1]], src[BL_id[2]], src[BL_id[3]],
+ src[BL_id[4]], src[BL_id[5]], src[BL_id[6]], src[BL_id[7]]};
+ half8 BR = (half8){src[BR_id[0]], src[BR_id[1]], src[BR_id[2]], src[BR_id[3]],
+ src[BR_id[4]], src[BR_id[5]], src[BR_id[6]], src[BR_id[7]]};
+
+ half8 res = w00 * TL + w01 * BL + w02 * TR + w03 * BR;
+
+ *((__local half8*)(dst + x)) = res;
+ }
+
+ for (; x < length; x++)
+ {
+ int TL_id = ind[x + 0*step];
+ int BL_id = ind[x + 1*step];
+ int TR_id = ind[x + 2*step];
+ int BR_id = ind[x + 3*step];
+
+ half w00 = weight[x + 0*step];
+ half w01 = weight[x + 1*step];
+ half w02 = weight[x + 2*step];
+ half w03 = weight[x + 3*step];
+
+ half TL = src[TL_id];
+ half TR = src[TR_id];
+ half BL = src[BL_id];
+ half BR = src[BR_id];
+
+ half res = w00 * TL + w01 * BL + w02 * TR + w03 * BR;
+ dst[x] = res;
+ }
+}
+
+__kernel void ocl_st(__global half const *const restrict src_data,
+ __global half const *const restrict theta,
+ __global half const *const restrict dst_data,
+ int C,
+ int W,
+ __local half *const restrict local_dst)
+{
+ int w = get_group_id(0);
+
+ int y = get_global_id(1);
+ int H = get_global_size(1);
+
+ __private int ind[4][MAX_WIDTH] __attribute__((aligned(16)));
+ __private half weight[4][MAX_WIDTH] __attribute__((aligned(16)));
+
+ const int x0 = w * MAX_WIDTH;
+ const int x1 = MIN(x0 + MAX_WIDTH, W);
+ const int length = x1 - x0;
+
+ calcInd(theta, weight, ind, y, H, x0, length, MAX_WIDTH, W);
+
+ for (int c = 0; c < C; c++)
+ {
+ __global half const *const restrict src = src_data + c*H*W;
+ __local half *const restrict dst = local_dst + c*get_local_size(1)*length + get_local_id(1)*length;
+
+ apply(src, weight, ind, dst, length, MAX_WIDTH);
+ }
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <pugixml.hpp>
+#include <ie_common.h>
+
+#include <vpu/utils/enums.hpp>
+#include <vpu/utils/small_vector.hpp>
+
+namespace vpu {
+
+namespace ie = InferenceEngine;
+
+VPU_DECLARE_ENUM(CustomParamType,
+ Input,
+ Output,
+ Data,
+ LocalData,
+ InputBuffer,
+ OutputBuffer,
+ Int,
+ Float)
+
+VPU_DECLARE_ENUM(CustomDataFormat,
+ BYXF = 0, // NHWC used in most software layers
+ BFYX = 1, // NCHW used if HW module is enabled
+ YXF = 2, // HWC used in most software layers
+ FYX = 3, // CHW used if HW module is enabled
+ BF = 4, // NC layout
+ Any = 5, // doesn't really matter
+ None = 6)
+
+VPU_DECLARE_ENUM(CustomDimSource, Input, Output)
+
+struct CustomKernel final {
+ struct KernelParam final {
+ CustomParamType type = CustomParamType::Input;
+ CustomDataFormat format = CustomDataFormat::Any;
+ std::string argName;
+ int portIndex = -1;
+ std::string irSource;
+ std::string bufferSizeRule;
+ CustomDimSource dimSource;
+ int dimIdx = -1;
+ };
+
+private:
+ std::string _configDir;
+ int _maxShaves = 0;
+ std::string _kernelBinary;
+ SmallVector<KernelParam> _kernelParams;
+ SmallVector<std::string> _globalGridSizeRules;
+ SmallVector<std::string> _localGridSizeRules;
+ SmallVector<std::string> _parameters;
+ int _kernelId = 0;
+
+ CustomDimSource _wgDimSource = CustomDimSource::Input;
+ int _wgDimIdx = -1;
+
+ int _inputDataCount = 0;
+
+public:
+ explicit CustomKernel(const pugi::xml_node& node, std::string configDir);
+
+ void processParametersNode(const pugi::xml_node& node);
+ void processWorkSizesNode(const pugi::xml_node& node);
+
+ int maxShaves() const { return _maxShaves; }
+ const std::string& kernelBinary() const { return _kernelBinary; }
+ SmallVector<KernelParam> bindings() const { return _kernelParams; }
+ SmallVector<std::string> globalGridSizeRules() const { return _globalGridSizeRules; }
+ SmallVector<std::string> localGridSizeRules() const { return _localGridSizeRules; }
+ SmallVector<std::string> parameters() const { return _parameters; }
+ int kernelId() const { return _kernelId; }
+ CustomDimSource dimSource() const { return _wgDimSource; }
+ int dimSourceIndex() const { return _wgDimIdx; }
+ int inputDataCount() const { return _inputDataCount; }
+};
+
+} // namespace vpu
#include <functional>
#include <details/caseless.hpp>
-
#include <pugixml.hpp>
#include <vpu/utils/enums.hpp>
#include <vpu/utils/small_vector.hpp>
+#include <vpu/frontend/custom_kernel.hpp>
+
+#include <ie_common.h>
namespace vpu {
namespace ie = InferenceEngine;
-VPU_DECLARE_ENUM(CustomDataFormat,
- BYXF = 0, // NHWC used in most software layers
- BFYX = 1, // NCHW used if HW module is enabled
- YXF = 2, // HWC used in most software layers
- FYX = 3, // CHW used if HW module is enabled
- Any = 4, // doesn't really matter
- None = 5
-)
-
-VPU_DECLARE_ENUM(CustomParamType,
- Input,
- Output,
- Data,
- LocalData,
- InputBuffer,
- OutputBuffer,
- Int,
- Float
-)
-
-VPU_DECLARE_ENUM(CustomDimSource,
- Input,
- Output
-)
-
class CustomLayer final {
public:
using Ptr = std::shared_ptr<CustomLayer>;
+ explicit CustomLayer(std::string configDir, const pugi::xml_node& customLayer);
- struct KernelParam final {
- CustomParamType type = CustomParamType::Input;
- CustomDataFormat format = CustomDataFormat::Any;
- std::string argName;
- int portIndex = -1;
- std::string irSource;
- SmallVector<std::string> bufferSizeRules;
- CustomDimSource dimSource;
- int dimIdx = -1;
- };
+ std::vector<CustomKernel> kernels() const { return _kernels; }
+ std::string layerName() const { return _layerName; }
+ std::map<int, CustomDataFormat> inputs() { return _inputs; }
+ std::map<int, CustomDataFormat> outputs() { return _outputs; }
static ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> loadFromFile(
const std::string& configFile,
bool canBeMissed = false);
- const std::string& kernelBinary() const { return _kernelBinary; }
-
- void setStageNumInputs(int id);
- int stageNumInputs() const;
- uint32_t kernelAddress(int idx = 1) const;
- int kernelId() const;
- int maxShaves() const;
- const std::map<std::string, std::string>& whereParams() const { return _whereParams; }
-
- const SmallVector<KernelParam>& bindings() const { return _kernelParams; }
- const SmallVector<std::string>& parameters() const { return _parameters; }
-
- const SmallVector<std::string>& globalSizeRules() const { return _globalSizeRules; }
- const SmallVector<std::string>& localSizeRules() const { return _localSizeRules; }
-
- CustomDimSource dimSource() const { return _wgDimSource; }
- int dimSourceIndex() const { return _wgDimIdx; }
-
-private:
- explicit CustomLayer(const std::string& dirname) : _configDir(dirname) {}
-
- void loadSingleLayer(const pugi::xml_node& node);
- void processWhere(const pugi::xml_node& node);
- void processKernelNode(const pugi::xml_node& node);
- void processParametersNode(const pugi::xml_node& node);
- void processWorkSizesNode(const pugi::xml_node& node);
-
- static bool isLegalSizeRule(const std::string& rule);
- static CustomDataFormat formatFromString(const std::string& str);
+ bool meetsWhereRestrictions(const std::map<std::string, std::string>& params) const;
+ static bool isLegalSizeRule(const std::string& rule, std::map<std::string, std::string> layerParams);
+ static CustomDataFormat formatFromLayout(const InferenceEngine::Layout& layout);
private:
std::string _configDir;
std::string _layerName;
- std::string _kernelEntry;
- std::string _kernelBinary;
std::map<std::string, std::string> _whereParams;
- int _maxShaves = 0;
- int _stageNumInputs = -1;
-
- SmallVector<KernelParam> _kernelParams;
- SmallVector<std::string> _globalSizeRules;
- SmallVector<std::string> _localSizeRules;
- SmallVector<std::string> _parameters;
-
- std::map<uint32_t, uint32_t, std::greater<uint32_t>> _kernelAddress;
+ std::vector<CustomKernel> _kernels;
- CustomDimSource _wgDimSource = CustomDimSource::Input;
- int _wgDimIdx = -1;
+ std::map<int, CustomDataFormat> _inputs;
+ std::map<int, CustomDataFormat> _outputs;
};
}; // namespace vpu
void parseExpTopKROIs(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
void parseNonZero(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
void parseROIAlign(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
+ void parseOutShapeOfReshape(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
+ void parseBroadcast(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
//
// Special layers
void parseLSTMCell(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
void parseTensorIterator(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
-//
-// Utility
-//
+ //
+ // Utility
+ //
+
+ static CustomLayer::Ptr getSuitableCustomLayer(const std::vector<CustomLayer::Ptr>& customLayers, const ie::CNNLayerPtr&cnnLayer);
private:
Data getVpuData(const ie::DataPtr& ieData) const;
bool dumpAllPasses;
bool disableReorder = false; // TODO: rename to enableReorder and switch logic.
+ bool disableConvertStages = false;
bool enablePermuteMerging = true;
bool enableReplWithSCRelu = false;
bool enableReplaceWithReduceMean = true;
Pass::Ptr splitHwDepthConv();
Pass::Ptr splitHwConvAndPool();
Pass::Ptr hwPadding();
+ Pass::Ptr splitLargeKernelConv();
//
// Batch support
void serializeDescImpl(
BlobSerializer& serializer,
const DataDesc& storedDesc,
- const DimValues& storedStrides) const;
+ const ShapeLocation& shapeLocation) const;
private:
inline DataNode() :
friend ModelObj;
};
- inline DataToDataEdgeHelper connectDataWithData() {
- return DataToDataEdgeHelper(this);
- }
-
DataToShapeAllocation connectDataWithShape(
const Data& parent,
const Data& child);
- void replaceParentData(
+ void replaceDataToShapeParent(
+ const DataToShapeAllocation& edge,
+ const Data& newParent);
+ void replaceDataToShapeChild(
+ const DataToShapeAllocation& edge,
+ const Data& newChild);
+
+ inline DataToDataEdgeHelper connectDataWithData() {
+ return DataToDataEdgeHelper(this);
+ }
+
+ void replaceDataToDataParent(
const DataToDataAllocation& edge,
const Data& newParent);
- void replaceChildData(
+ void replaceDataToDataChild(
const DataToDataAllocation& edge,
const Data& newChild);
StubPriorBox,
StubPriorBoxClustered,
- Concat,
+ StubConcat,
Split,
Reshape,
Expand,
Pad = 71,
Resample = 72,
Upsampling = 73,
- ArgMax = 74,
Div = 75,
Min = 76,
Squared_diff = 77,
ExpGenerateProposals = 124,
ExpTopKROIs = 125,
ScatterElementsUpdate = 126,
+ OutShapeOfReshape = 127,
+ Concat = 128,
+ Broadcast = 129,
)
//
IndexOnly = 2)
//
+// ConcatInferRequirement
+//
+
+// Requirement whether to infer Concat stage on the device side
+VPU_DECLARE_ENUM(ConcatInferRequirement,
+ NeedToInfer = 0,
+ CanBeReplaced = 1)
+
+//
+// ConcatInferRequirement
+//
+
+// Modes for Broadcast operation according to specification
+VPU_DECLARE_ENUM(BroadcastMode,
+ NUMPY = 0,
+ EXPLICIT = 1)
+
+//
// StageDataInfo
//
DECLARE_VPU_CONFIG_KEY(DISABLE_REORDER);
/**
+ * @brief Used to disable convert stages in tests to be able to insert
+ * convert layer with desired precision.
+ */
+DECLARE_VPU_CONFIG_KEY(DISABLE_CONVERT_STAGES);
+
+/**
* @brief Used to disable permute merging pass (with setting "NO") in tests to check it preserves behaviour. Default = "YES"
*/
DECLARE_VPU_CONFIG_KEY(ENABLE_PERMUTE_MERGING);
const ie::CNNLayerPtr& layer,
Dim axis,
const DataVector& inputs,
- const Data& output);
+ const Data& output,
+ ConcatInferRequirement inferRequirement = ConcatInferRequirement::CanBeReplaced);
Stage addConcatStage(
const Model& model,
void BackEnd::serializeConstShapes(const Model& model, const mv_blob_header& blobHdr, std::vector<char>& blob) {
for (const auto& data : model->datas()) {
- const auto serializeToBlob = [&data, &blob, &blobHdr](const BlobSerializer& serializer, int offset) {
- std::copy_n(serializer.data(), data->desc().numDims() * sizeof(uint32_t), blob.data() + blobHdr.const_data_section_offset + offset);
- };
-
const auto dimsOrder = data->desc().dimsOrder();
const auto storedPerm = dimsOrder.toPermutation();
- const auto shapeLocation = data->shapeLocation();
-
- if (shapeLocation.dimsLocation == Location::Blob) {
- BlobSerializer dimsSerializer;
- const auto dims = data->desc().dims();
+ const auto serializeToBlob = [&data, &blob, &blobHdr, &storedPerm](const DimValues& values, int offset) {
+ BlobSerializer serializer;
for (const auto& d : storedPerm) {
- dimsSerializer.append(checked_cast<uint32_t>(dims[d]));
+ serializer.append(checked_cast<uint32_t>(values[d]));
}
- serializeToBlob(dimsSerializer, shapeLocation.dimsOffset);
+
+ std::copy_n(serializer.data(), data->desc().numDims() * sizeof(uint32_t), blob.data() + blobHdr.const_data_section_offset + offset);
+ };
+
+ const auto shapeLocation = data->shapeLocation();
+
+ if (shapeLocation.dimsLocation == Location::Blob) {
+ serializeToBlob(data->desc().dims(), shapeLocation.dimsOffset);
+ } else if (data->usage() == DataUsage::Output) {
+ auto ioDimsUpperBoundOffset = data->attrs().get<int>("ioDimsUpperBoundOffset");
+ serializeToBlob(data->desc().dims(), ioDimsUpperBoundOffset);
}
if (shapeLocation.stridesLocation == Location::Blob) {
- BlobSerializer stridesSerializer;
- const auto strides = data->strides();
-
- for (const auto& d : storedPerm) {
- stridesSerializer.append(checked_cast<uint32_t>(strides[d]));
- }
- serializeToBlob(stridesSerializer, shapeLocation.stridesOffset);
+ serializeToBlob(data->strides(), shapeLocation.stridesOffset);
+ } else if (data->usage() == DataUsage::Output) {
+ auto ioStridesUpperBoundOffset = data->attrs().get<int>("ioStridesUpperBoundOffset");
+ serializeToBlob(data->strides(), ioStridesUpperBoundOffset);
}
}
}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/custom_kernel.hpp>
+#include <xml_parse_utils.h>
+#include <details/caseless.hpp>
+#include <vpu/utils/extra.hpp>
+
+namespace vpu {
+
+VPU_PACKED(Elf32Ehdr {
+ uint8_t offs1[28];
+ uint32_t ePhoff; // Program header offset
+ uint32_t eShoff; // Section header offset
+ uint8_t offs2[12];
+ uint16_t eShnum; // Number of sections
+ uint16_t offs3;
+};)
+
+VPU_PACKED(Elf32Section {
+ uint32_t shName;
+ uint32_t shType;
+ uint32_t shFlags;
+ uint32_t shAddr;
+ uint32_t shOffset;
+ uint32_t shSize;
+ uint32_t shLink;
+ uint32_t shInfo;
+ uint32_t shAddralign;
+ uint32_t shEntsize;
+};)
+
+VPU_PACKED(Elf32Phdr {
+ uint32_t pType; // Identifies program segment type
+ uint32_t pOffset; // Segment file offset
+ uint32_t pVaddr; // Segment virtual address
+ uint32_t pPaddr; // Segment physical address
+ uint32_t pFilesz; // Segment size in file
+ uint32_t pMemsz; // Segment size in memory
+ uint32_t pFlags; // Flags position from ELF standard spec
+ uint32_t pAlign; // Segment alignment, file & memory
+};)
+
+VPU_PACKED(Elf32Sym {
+ uint32_t stName;
+ uint32_t stValue;
+ uint32_t stSize;
+ uint8_t stInfo;
+ uint8_t stOther;
+ uint16_t stShndx;
+};)
+
+VPU_PACKED(KernelHdr {
+ uint32_t address; // Kernel address
+ uint32_t flags; // Should be 0 for now
+ uint32_t sectionSize; // Section size, offset to the next kernel
+ uint32_t argOffset; // offset to arguments
+ uint32_t stackSize; // Size of the stack required for kernel
+ uint32_t stackSizeWI; // Size of the stack required for kernel per WI
+};)
+
+VPU_PACKED(KernelArgHdr {
+ uint32_t stringOffset;
+ uint32_t addressSpace;
+ uint32_t typeOffset;
+ uint32_t size;
+ uint32_t laneSize;
+};)
+
+std::pair<const Elf32Section*, const Elf32Section*> findSymbolTable(
+ const char* ELFData) {
+ const uint32_t SYMTAB = 2; // Link editing symbol table
+ const uint32_t STRTAB = 3; // A string table
+
+ IE_ASSERT(ELFData != nullptr);
+
+ auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
+ auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
+
+ const Elf32Section* strShdr = nullptr;
+ const Elf32Section* symShdr = nullptr;
+ for (size_t i = 0; i < ehdr->eShnum; i++) {
+ if (shdr[i].shType == STRTAB && strShdr == nullptr) {
+ strShdr = &shdr[i];
+ } else if (shdr[i].shType == SYMTAB && symShdr == nullptr) {
+ symShdr = &shdr[i];
+ }
+
+ if (symShdr != nullptr && strShdr != nullptr)
+ break;
+ }
+ IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
+
+ return std::make_pair(strShdr, symShdr);
+}
+
+SmallVector<std::string> deduceKernelParameters(
+ const char* ELFData,
+ uint32_t kernelAddress) {
+ IE_ASSERT(ELFData != nullptr);
+ const auto cmp = ie::details::CaselessEq<std::string>{};
+
+ auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
+ auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
+ auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
+
+ const Elf32Section* strShdr = nullptr;
+ const Elf32Section* symShdr = nullptr;
+ std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
+ IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
+
+ auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
+ auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
+ auto firstStr = ELFData + strShdr->shOffset;
+
+ const char* kernelArgStrings = nullptr;
+ for (size_t i = 0; i < numSymEntries; i++) {
+ if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
+ kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
+ break;
+ }
+ }
+ IE_ASSERT(kernelArgStrings != nullptr);
+
+ SmallVector<std::string> parameters;
+ for (size_t i = 0; i < numSymEntries; i++) {
+ if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
+ auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
+ auto numKernels = *reinterpret_cast<const int*>(ptr);
+
+ auto metaOffset = sizeof(int);
+ for (int k = 0; k < numKernels; k++) {
+ auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
+
+ if (kHdr->address-phdr->pVaddr == kernelAddress) {
+ auto aHdr = reinterpret_cast<const KernelArgHdr*>(
+ reinterpret_cast<const char*>(&(kHdr->argOffset)) + sizeof(kHdr->argOffset) + kHdr->argOffset);
+
+ auto numArgs = reinterpret_cast<const int*>(aHdr)[-1];
+ for (int n = 0; n < numArgs; n++, aHdr++) {
+ parameters.push_back(kernelArgStrings + aHdr->stringOffset);
+ }
+
+ break;
+ }
+
+ metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
+ }
+ }
+ }
+
+ return parameters;
+}
+
+int32_t getKernelId(
+ const char* ELFData,
+ uint32_t kernelAddress) {
+ IE_ASSERT(ELFData != nullptr);
+ const auto cmp = ie::details::CaselessEq<std::string>{};
+
+ auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
+ auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
+ auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
+
+ const Elf32Section* strShdr = nullptr;
+ const Elf32Section* symShdr = nullptr;
+ std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
+ IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
+
+ auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
+ auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
+ auto firstStr = ELFData + strShdr->shOffset;
+
+ const char* kernelArgStrings = nullptr;
+ for (size_t i = 0; i < numSymEntries; i++) {
+ if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
+ kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
+ break;
+ }
+ }
+ IE_ASSERT(kernelArgStrings != nullptr);
+
+ for (size_t i = 0; i < numSymEntries; i++) {
+ if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
+ auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
+ auto numKernels = *reinterpret_cast<const int*>(ptr);
+
+ auto metaOffset = sizeof(int);
+ for (int k = 0; k < numKernels; k++) {
+ auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
+
+ if (kHdr->address-phdr->pVaddr == kernelAddress) {
+ return k;
+ }
+
+ metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
+ }
+ }
+ }
+
+ return -1;
+}
+
+uint32_t getKernelEntry(const char* ELFData, const std::string& kernelName) {
+ IE_ASSERT(ELFData != nullptr);
+ const auto cmp = ie::details::CaselessEq<std::string>{};
+
+ auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
+ auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
+
+ const Elf32Section* strShdr = nullptr;
+ const Elf32Section* symShdr = nullptr;
+ std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
+ IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
+
+ auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
+ auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
+ auto firstStr = ELFData + strShdr->shOffset;
+
+ for (size_t i = 0; i < numSymEntries; i++) {
+ if (cmp(firstStr + sym[i].stName, kernelName)) {
+ return sym[i].stValue - phdr->pVaddr;
+ }
+ }
+
+ THROW_IE_EXCEPTION << "Cannot find kernel entry point for custom kernel " << kernelName;
+}
+
+CustomKernel::CustomKernel(const pugi::xml_node& kernel, std::string configDir): _configDir {std::move(configDir)} {
+ _maxShaves = XMLParseUtils::GetIntAttr(kernel, "max-shaves", 0);
+
+ for (auto source = kernel.child("Source"); !source.empty(); source = source.next_sibling("Source")) {
+ auto fileName = _configDir + "/" + XMLParseUtils::GetStrAttr(source, "filename", "");
+
+ std::ifstream inputFile(fileName, std::ios::binary);
+ if (!inputFile.is_open()) {
+ THROW_IE_EXCEPTION << "Couldn't open kernel file " << fileName;
+ }
+
+ std::ostringstream contentStream;
+ contentStream << inputFile.rdbuf();
+ _kernelBinary.append(contentStream.str());
+ }
+
+ const auto kernelEntryName = XMLParseUtils::GetStrAttr(kernel, "entry");
+ const auto kernelEntry = getKernelEntry(&_kernelBinary[0], kernelEntryName);
+ _parameters = deduceKernelParameters(&_kernelBinary[0], kernelEntry);
+ _kernelId = getKernelId(&_kernelBinary[0], kernelEntry);
+
+ processParametersNode(kernel);
+ processWorkSizesNode(kernel);
+
+ const auto isInputData = [&](const CustomKernel::KernelParam& param) {
+ return param.type == CustomParamType::Input || param.type == CustomParamType::InputBuffer ||
+ param.type == CustomParamType::Data;
+ };
+
+ _inputDataCount = std::count_if(begin(_kernelParams), end(_kernelParams), isInputData);
+}
+
+std::pair<CustomDimSource, int> parseDimSource(const std::string& dims) {
+ const auto cmp = ie::details::CaselessEq<std::string>{};
+ const auto pos = dims.find_first_of(',');
+ const auto source = dims.substr(0, pos);
+ const auto dimSource = [&] {
+ if (cmp(source, "input")) {
+ return CustomDimSource::Input;
+ } else if (cmp(source, "output")) {
+ return CustomDimSource::Output;
+ } else {
+ THROW_IE_EXCEPTION << "Invalid dim source argument" << source;
+ }
+ }();
+
+ const auto idx = [&] {
+ if (pos == std::string::npos) {
+ return -1;
+ }
+ const auto idxString = dims.substr(pos + 1, std::string::npos);
+ return std::stoi(idxString);
+ }();
+
+ return std::make_pair(dimSource, idx);
+}
+
+
+CustomDataFormat formatFromString(const std::string& str) {
+ static const ie::details::caseless_map<std::string, CustomDataFormat> FormatNameToType = {
+ { "BFYX" , CustomDataFormat::BFYX },
+ { "BYXF" , CustomDataFormat::BYXF },
+ { "FYX" , CustomDataFormat::FYX },
+ { "YXF" , CustomDataFormat::YXF },
+ { "BF" , CustomDataFormat::BF },
+ { "ANY" , CustomDataFormat::Any }
+ };
+
+ auto it = FormatNameToType.find(str);
+ if (it != FormatNameToType.end()) {
+ return it->second;
+ }
+
+ THROW_IE_EXCEPTION << "Tensor node has an invalid format '" << str << "'";
+}
+
+SmallVector<std::string> parseSizeRule(const std::string& size) {
+ auto result = SmallVector<std::string>();
+ result.reserve(std::count(begin(size), end(size), ',') + 1);
+ std::stringstream sizeRules{size};
+ std::string bufferSize;
+
+ while (std::getline(sizeRules, bufferSize, ',')) {
+ result.push_back(bufferSize);
+ }
+
+ return result;
+}
+
+void CustomKernel::processParametersNode(const pugi::xml_node& node) {
+ const auto cmp = ie::details::CaselessEq<std::string> {};
+ const auto parameters = node.child("Parameters");
+
+ for (auto tensor = parameters.child("Tensor"); !tensor.empty(); tensor = tensor.next_sibling("Tensor")) {
+ KernelParam kp;
+
+ auto typeStr = XMLParseUtils::GetStrAttr(tensor, "type");
+ if (cmp(typeStr, "input")) {
+ kp.type = CustomParamType::Input;
+ } else if (cmp(typeStr, "output")) {
+ kp.type = CustomParamType::Output;
+ } else if (cmp(typeStr, "input_buffer")) {
+ kp.type = CustomParamType::InputBuffer;
+ } else if (cmp(typeStr, "output_buffer")) {
+ kp.type = CustomParamType::OutputBuffer;
+ } else if (cmp(typeStr, "data")) {
+ kp.type = CustomParamType::Data;
+ } else {
+ THROW_IE_EXCEPTION << "Tensor node has an invalid type '" << typeStr << "'";
+ }
+
+ if (kp.type == CustomParamType::InputBuffer || kp.type == CustomParamType::OutputBuffer) {
+ const auto sizeRule = XMLParseUtils::GetStrAttr(tensor, "size");
+ kp.bufferSizeRule = parseSizeRule(sizeRule)[0];
+
+ const auto dimString = XMLParseUtils::GetStrAttr(tensor, "dim");
+ std::tie(kp.dimSource, kp.dimIdx) = parseDimSource(dimString);
+ }
+
+ kp.format = formatFromString(XMLParseUtils::GetStrAttr(tensor, "format", "BFYX"));
+ kp.argName = XMLParseUtils::GetStrAttr(tensor, "arg-name");
+ kp.portIndex = XMLParseUtils::GetIntAttr(tensor, "port-index");
+
+ _kernelParams.push_back(std::move(kp));
+ }
+
+ for (auto data = parameters.child("Data"); !data.empty(); data = data.next_sibling("Data")) {
+ KernelParam kp;
+
+ auto typeStr = XMLParseUtils::GetStrAttr(data, "type");
+ if (cmp(typeStr, "data")) {
+ kp.type = CustomParamType::Data;
+ } else if (cmp(typeStr, "local_data")) {
+ kp.type = CustomParamType::LocalData;
+ } else {
+ THROW_IE_EXCEPTION << "Data node has an invalid type '" << typeStr << "'";
+ }
+
+ kp.argName = XMLParseUtils::GetStrAttr(data, "arg-name");
+
+ kp.irSource = XMLParseUtils::GetStrAttr(data, "source", "");
+ const auto dimString = XMLParseUtils::GetStrAttr(data, "dim", "");
+
+ if (kp.irSource.empty() && dimString.empty()) {
+ THROW_IE_EXCEPTION << "Data node has no source or dim";
+ }
+
+ if (!kp.irSource.empty() && !dimString.empty()) {
+ THROW_IE_EXCEPTION << "Data node can only have source or dim";
+ }
+
+ if (kp.type == CustomParamType::LocalData) {
+ const auto bufferSize = XMLParseUtils::GetStrAttr(data, "size", "");
+ kp.bufferSizeRule = bufferSize;
+
+ if (!dimString.empty()) {
+ std::tie(kp.dimSource, kp.dimIdx) = parseDimSource(dimString);
+ }
+ }
+
+ _kernelParams.push_back(std::move(kp));
+ }
+
+ for (auto scalar = parameters.child("Scalar"); !scalar.empty(); scalar = scalar.next_sibling("Scalar")) {
+ KernelParam kp;
+
+ const auto type = XMLParseUtils::GetStrAttr(scalar, "type");
+ if (cmp(type, "int")) {
+ kp.type = CustomParamType::Int;
+ } else if (cmp(type, "float")) {
+ kp.type = CustomParamType::Float;
+ } else {
+ THROW_IE_EXCEPTION << "Scalar node has an invalid type " << type;
+ }
+
+ kp.argName = XMLParseUtils::GetStrAttr(scalar, "arg-name");
+ kp.portIndex = XMLParseUtils::GetIntAttr(scalar, "port-index", -1);
+ kp.irSource = XMLParseUtils::GetStrAttr(scalar, "source", "");
+
+ _kernelParams.push_back(std::move(kp));
+ }
+}
+
+void CustomKernel::processWorkSizesNode(const pugi::xml_node& node) {
+ const auto workSizes = node.child("WorkSizes");
+
+ const auto dims = XMLParseUtils::GetStrAttr(workSizes, "dim");
+ std::tie(_wgDimSource, _wgDimIdx) = parseDimSource(dims);
+
+ const auto gwgs = XMLParseUtils::GetStrAttr(workSizes, "global");
+ _globalGridSizeRules = parseSizeRule(gwgs);
+
+ const auto lwgs = XMLParseUtils::GetStrAttr(workSizes, "local");
+ _localGridSizeRules = parseSizeRule(lwgs);
+}
+
+} // namespace vpu
//
#include <vpu/frontend/custom_layer.hpp>
+#include <vpu/utils/numeric.hpp>
#include <climits>
#include <streambuf>
#include <tuple>
#include <utility>
-#include <memory>
#include <string>
#include <vector>
#include <vpu/utils/simple_math.hpp>
#include <vpu/utils/error.hpp>
-#include <vpu/utils/extra.hpp>
+#include <cstring>
namespace vpu {
namespace {
-VPU_PACKED(Elf32Ehdr {
- uint8_t offs1[28];
- uint32_t ePhoff; // Program header offset
- uint32_t eShoff; // Section header offset
- uint8_t offs2[12];
- uint16_t eShnum; // Number of sections
- uint16_t offs3;
-};)
-
-VPU_PACKED(Elf32Section {
- uint32_t shName;
- uint32_t shType;
- uint32_t shFlags;
- uint32_t shAddr;
- uint32_t shOffset;
- uint32_t shSize;
- uint32_t shLink;
- uint32_t shInfo;
- uint32_t shAddralign;
- uint32_t shEntsize;
-};)
-
-VPU_PACKED(Elf32Phdr {
- uint32_t pType; // Identifies program segment type
- uint32_t pOffset; // Segment file offset
- uint32_t pVaddr; // Segment virtual address
- uint32_t pPaddr; // Segment physical address
- uint32_t pFilesz; // Segment size in file
- uint32_t pMemsz; // Segment size in memory
- uint32_t pFlags; // Flags position from ELF standard spec
- uint32_t pAlign; // Segment alignment, file & memory
-};)
-
-VPU_PACKED(Elf32Sym {
- uint32_t stName;
- uint32_t stValue;
- uint32_t stSize;
- uint8_t stInfo;
- uint8_t stOther;
- uint16_t stShndx;
-};)
-
-VPU_PACKED(KernelHdr {
- uint32_t address; // Kernel address
- uint32_t flags; // Should be 0 for now
- uint32_t sectionSize; // Section size, offset to the next kernel
- uint32_t argOffset; // offset to arguments
- uint32_t stackSize; // Size of the stack required for kernel
- uint32_t stackSizeWI; // Size of the stack required for kernel per WI
-};)
-
-VPU_PACKED(KernelArgHdr {
- uint32_t stringOffset;
- uint32_t addressSpace;
- uint32_t typeOffset;
- uint32_t size;
- uint32_t laneSize;
-};)
-
-enum Flags {
- CL_Vecz = 0x01,
- CL_Unrolled = 0x02,
- CL_Predicated = 0x04,
- CL_Dma = 0x08,
- CL_VeczDma = 0x10
-};
-
-std::pair<const Elf32Section*, const Elf32Section*> findSymbolTable(
- const char* ELFData) {
- const uint32_t SYMTAB = 2; // Link editing symbol table
- const uint32_t STRTAB = 3; // A string table
-
- IE_ASSERT(ELFData != nullptr);
-
- auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
- auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
-
- const Elf32Section* strShdr = nullptr;
- const Elf32Section* symShdr = nullptr;
- for (size_t i = 0; i < ehdr->eShnum; i++) {
- if (shdr[i].shType == STRTAB && strShdr == nullptr) {
- strShdr = &shdr[i];
- } else if (shdr[i].shType == SYMTAB && symShdr == nullptr) {
- symShdr = &shdr[i];
- }
-
- if (symShdr != nullptr && strShdr != nullptr)
- break;
- }
- IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
- return std::make_pair(strShdr, symShdr);
-}
-
-uint32_t getKernelEntry(const char* ELFData, const std::string& kernelName) {
- ie::details::CaselessEq<std::string> cmp;
-
- IE_ASSERT(ELFData != nullptr);
-
- auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
- auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
-
- const Elf32Section* strShdr = nullptr;
- const Elf32Section* symShdr = nullptr;
- std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
- IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
- auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
- auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
- auto firstStr = ELFData + strShdr->shOffset;
-
- for (size_t i = 0; i < numSymEntries; i++) {
- if (cmp(firstStr + sym[i].stName, kernelName)) {
- return sym[i].stValue - phdr->pVaddr;
- }
+void assertExactlyOneOccurrence(const pugi::xml_node &node, const SmallVector<std::string>& childs) {
+ for (const auto &name : childs) {
+ const auto& child = node.child(name.c_str());
+ VPU_THROW_UNLESS(!child.empty(), "Required parameter %s is not found", name);
+ VPU_THROW_UNLESS(child.next_sibling(name.c_str()).empty(),
+ "Found several definitions of the parameter %s", name);
}
-
- VPU_THROW_EXCEPTION << "Cannot find kernel entry point for custom kernel " << kernelName;
}
-SmallVector<std::string> deduceKernelParameters(
- const char* ELFData,
- uint32_t kernelAddress) {
- ie::details::CaselessEq<std::string> cmp;
- IE_ASSERT(ELFData != nullptr);
-
- auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
- auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
- auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
-
- const Elf32Section* strShdr = nullptr;
- const Elf32Section* symShdr = nullptr;
- std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
- IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
- auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
- auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
- auto firstStr = ELFData + strShdr->shOffset;
-
- const char* kernelArgStrings = nullptr;
- for (size_t i = 0; i < numSymEntries; i++) {
- if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
- kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
- break;
- }
- }
- IE_ASSERT(kernelArgStrings != nullptr);
-
- SmallVector<std::string> parameters;
- for (size_t i = 0; i < numSymEntries; i++) {
- if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
- auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
- auto numKernels = *reinterpret_cast<const int*>(ptr);
-
- auto metaOffset = sizeof(int);
- for (int k = 0; k < numKernels; k++) {
- auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
-
- if (kHdr->address-phdr->pVaddr == kernelAddress) {
- auto aHdr = reinterpret_cast<const KernelArgHdr*>(
- reinterpret_cast<const char*>(&(kHdr->argOffset)) + sizeof(kHdr->argOffset) + kHdr->argOffset);
-
- auto numArgs = reinterpret_cast<const int*>(aHdr)[-1];
- for (int n = 0; n < numArgs; n++, aHdr++) {
- parameters.push_back(kernelArgStrings + aHdr->stringOffset);
- }
-
- break;
- }
-
- metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
- }
- }
+void assertOneOrMoreOccurrence(const pugi::xml_node &node, const SmallVector<std::string>& childs) {
+ for (const auto& name : childs) {
+ const auto& child = node.child(name.c_str());
+ VPU_THROW_UNLESS(!child.empty(),
+ "Required parameter %s is not found", name);
}
-
- return parameters;
}
-std::pair<uint32_t, uint32_t> deduceVectorized(
- const char* ELFData,
- uint32_t kernelAddress) {
- ie::details::CaselessEq<std::string> cmp;
-
- IE_ASSERT(ELFData != nullptr);
-
- auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
- auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
- auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
-
- const Elf32Section* strShdr = nullptr;
- const Elf32Section* symShdr = nullptr;
- std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
- IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
- auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
- auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
- auto firstStr = ELFData + strShdr->shOffset;
-
- const char* kernelArgStrings = nullptr;
- for (size_t i = 0; i < numSymEntries; i++) {
- if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
- kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
- break;
- }
- }
- IE_ASSERT(kernelArgStrings != nullptr);
-
- for (size_t i = 0; i < numSymEntries; i++) {
- if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
- auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
- auto numKernels = *reinterpret_cast<const int*>(ptr);
-
- auto metaOffset = sizeof(int);
- for (int k = 0; k < numKernels; k++) {
- auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
-
- if (kHdr->address-phdr->pVaddr == kernelAddress && kHdr->flags == 1) {
- auto vecInfo = reinterpret_cast<const uint32_t*>(kHdr + 1);
- return std::make_pair(vecInfo[1], vecInfo[0]-phdr->pVaddr);
- }
-
- metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
- }
- }
+void assertZeroOrOneOccurrence(const pugi::xml_node& node, const SmallVector<std::string>& childNames) {
+ for (const auto& name : childNames) {
+ const auto& child = node.child(name.c_str());
+ VPU_THROW_UNLESS(!child.empty() || child.next_sibling(name.c_str()).empty(),
+ "Found several definitions of the parameter %s", name);
}
-
- return std::make_pair(0, 0);
}
-int32_t getKernelId(
- const char* ELFData,
- uint32_t kernelAddress) {
- ie::details::CaselessEq<std::string> cmp;
-
- IE_ASSERT(ELFData != nullptr);
-
- auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
- auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
- auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
-
- const Elf32Section* strShdr = nullptr;
- const Elf32Section* symShdr = nullptr;
- std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
- IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
- auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
- auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
- auto firstStr = ELFData + strShdr->shOffset;
-
- const char* kernelArgStrings = nullptr;
- for (size_t i = 0; i < numSymEntries; i++) {
- if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
- kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
- break;
+void assertNoEmptyAttributes(const pugi::xml_node& customLayer) {
+ const auto checkAttributes = [&customLayer](const pugi::xml_node& node) {
+ for (const auto& attr : node.attributes()) {
+ VPU_THROW_UNLESS(strlen(attr.value()) != 0,
+ "Wrong custom layer XML: Custom layer %s has node <%s> with an empty attribute %s",
+ customLayer.attribute("name").value(), node.name(), attr.name());
}
- }
- IE_ASSERT(kernelArgStrings != nullptr);
-
- for (size_t i = 0; i < numSymEntries; i++) {
- if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
- auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
- auto numKernels = *reinterpret_cast<const int*>(ptr);
-
- auto metaOffset = sizeof(int);
- for (int k = 0; k < numKernels; k++) {
- auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
+ };
- if (kHdr->address-phdr->pVaddr == kernelAddress) {
- return k;
- }
+ checkAttributes(customLayer);
- metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
- }
- }
+ for (const auto& child : customLayer.children()) {
+ assertNoEmptyAttributes(child);
}
-
- return -1;
}
} // namespace
ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> CustomLayer::loadFromFile(
const std::string& configFile,
bool canBeMissed) {
- ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> out;
-
pugi::xml_document xmlDoc;
pugi::xml_parse_result res = xmlDoc.load_file(configFile.c_str());
if (res.status != pugi::status_ok) {
if (canBeMissed) {
// Config file might not exist - like global config, for example.
- return out;
+ return {};
} else {
- VPU_THROW_EXCEPTION
- << "Failed to load custom layer configuration file " << configFile
- << " : " << res.description()
- << " at offset " << res.offset;
+ VPU_THROW_FORMAT("Failed to load custom layer configuration file %s : %s at offset %s",
+ configFile, res.description(), res.offset);
}
}
auto abs_path_ptr = realpath(configFile.c_str(), path);
#endif
- if (abs_path_ptr == nullptr) {
- VPU_THROW_EXCEPTION
- << "Failed to load custom layer configuration file " << configFile
- << " : can't get canonicalized absolute path";
- }
+ VPU_THROW_UNLESS(abs_path_ptr != nullptr,
+ "Failed to load custom layer configuration file %s : can't get canonicalized absolute path", configFile);
std::string abs_file_name(path);
// Try extracting directory from config path.
auto dir_split_pos = abs_file_name.find_last_of("/\\");
- auto colon_pos = abs_file_name.find_first_of(":");
- auto first_slash_pos = abs_file_name.find_first_of("/");
+ auto colon_pos = abs_file_name.find_first_of(':');
+ auto first_slash_pos = abs_file_name.find_first_of('/');
// If path is absolute.
std::string dir_path;
<< " : path is not valid";
}
+ auto out = ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> {};
for (auto r = xmlDoc.document_element(); r; r = r.next_sibling()) {
- CustomLayer::Ptr layer(new CustomLayer(dir_path));
-
- layer->loadSingleLayer(r);
-
- out[layer->_layerName].push_back(layer);
+ auto layerPtr = std::make_shared<CustomLayer>(dir_path, r);
+ out[layerPtr->_layerName].push_back(std::move(layerPtr));
}
return out;
}
-int CustomLayer::maxShaves() const {
- return _maxShaves;
-}
-
-void CustomLayer::setStageNumInputs(int id) {
- _stageNumInputs = id;
-}
-
-int CustomLayer::stageNumInputs() const {
- return _stageNumInputs;
-}
-
-uint32_t CustomLayer::kernelAddress(int idx) const {
- for (const auto& x : _kernelAddress) {
- if ((checked_cast<uint32_t>(idx) % x.first) == 0) {
- return x.second;
- }
- }
-
- auto it = _kernelAddress.find(1);
- IE_ASSERT(it != _kernelAddress.end());
-
- return it->second;
-}
-
-int CustomLayer::kernelId() const {
- uint32_t kernelAddress = getKernelEntry(&_kernelBinary[0], _kernelEntry);
- return getKernelId(&_kernelBinary[0], kernelAddress);
-}
-
-void CustomLayer::loadSingleLayer(const pugi::xml_node& node) {
- ie::details::CaselessEq<std::string> cmp;
-
- std::string nodeName(node.name());
- if (!cmp(nodeName, "CustomLayer")) {
- VPU_THROW_EXCEPTION << "Wrong custom layer XML : Node is not CustomLayer, but " << nodeName;
- }
-
- auto nodeType = XMLParseUtils::GetStrAttr(node, "type", "");
- if (!cmp(nodeType, "MVCL")) {
- VPU_THROW_EXCEPTION << "Wrong custom layer XML : Type is not MVCL, but " << nodeType;
- }
-
- auto version = XMLParseUtils::GetIntAttr(node, "version", -1);
- IE_ASSERT(version == 1);
-
- auto layerStage = XMLParseUtils::GetStrAttr(node, "stage", "");
- auto layerName = XMLParseUtils::GetStrAttr(node, "name", "");
- if (layerName.empty()) {
- VPU_THROW_EXCEPTION << "Missing Layer name in CustomLayer";
- }
- _layerName = layerStage.empty() ? layerName : layerName + "@stage_" + layerStage;
-
- _maxShaves = XMLParseUtils::GetIntAttr(node, "max-shaves", 0);
-
- processWhere(node.child("Where"));
-
- processKernelNode(node.child("Kernel"));
-
- processParametersNode(node.child("Parameters"));
-
- processWorkSizesNode(node.child("WorkSizes"));
-}
-
-void CustomLayer::processWhere(const pugi::xml_node& node) {
- for (auto child : node.attributes()) {
- _whereParams[child.name()] = child.value();
- }
-}
-
-void CustomLayer::processKernelNode(const pugi::xml_node& node) {
- ie::details::CaselessEq<std::string> cmp;
-
- std::string nodeName(node.name());
- if (!cmp(nodeName, "Kernel")) {
- VPU_THROW_EXCEPTION << "Wrong node, expected Kernel found " << nodeName;
- }
-
- if (!_kernelBinary.empty()) {
- VPU_THROW_EXCEPTION << "Multiple definition of Kernel";
- }
-
- _kernelEntry = XMLParseUtils::GetStrAttr(node, "entry", "");
- if (_kernelEntry.empty()) {
- VPU_THROW_EXCEPTION << "No Kernel entry in custom layer";
- }
-
- _kernelBinary.clear();
- for (auto sourceNode = node.child("Source"); !sourceNode.empty(); sourceNode = sourceNode.next_sibling("Source")) {
- auto fileName = _configDir + "/" + XMLParseUtils::GetStrAttr(sourceNode, "filename", "");
-
- std::ifstream inputFile(fileName, std::ios::binary);
- if (!inputFile.is_open()) {
- VPU_THROW_EXCEPTION << "Couldn't open kernel file " << fileName;
- }
- std::ostringstream contentStream;
- contentStream << inputFile.rdbuf();
- _kernelBinary.append(contentStream.str());
- }
+CustomLayer::CustomLayer(std::string configDir, const pugi::xml_node& customLayer) : _configDir(std::move(configDir)) {
+ const auto cmp = ie::details::CaselessEq<std::string>{};
+ const auto nodeName = customLayer.name();
+ VPU_THROW_UNLESS(cmp(nodeName, "CustomLayer"),
+ "Wrong custom layer XML : Node is not CustomLayer, but %s", nodeName);
- _kernelAddress[1] = getKernelEntry(&_kernelBinary[0], _kernelEntry);
+ const auto nodeType = XMLParseUtils::GetStrAttr(customLayer, "type");
+ VPU_THROW_UNLESS(cmp(nodeType, "MVCL"),
+ "Wrong custom layer XML : Type is not MVCL, but %s", nodeType);
- _parameters = deduceKernelParameters(&_kernelBinary[0], _kernelAddress[1]);
+ const auto version = XMLParseUtils::GetIntAttr(customLayer, "version");
+ VPU_THROW_UNLESS(version == 1, "Wrong custom layer XML : only version 1 is supported");
- auto vecInfo = deduceVectorized(&_kernelBinary[0], _kernelAddress[1]);
- if (vecInfo.first != 0) {
- _kernelAddress[vecInfo.first] = vecInfo.second;
- }
-}
+ _layerName = XMLParseUtils::GetStrAttr(customLayer, "name");
-void CustomLayer::processParametersNode(const pugi::xml_node& node) {
- ie::details::CaselessEq<std::string> cmp;
+ assertNoEmptyAttributes(customLayer);
- std::string nodeName(node.name());
- if (!cmp(nodeName, "Parameters")) {
- VPU_THROW_EXCEPTION << "Wrong node, expected Parameters found " << nodeName;
+ assertZeroOrOneOccurrence(customLayer, {"Where"});
+ const auto whereNode = customLayer.child("Where");
+ for (auto where : whereNode.attributes()) {
+ _whereParams[where.name()] = where.value();
}
- for (auto tensorNode = node.child("Tensor"); !tensorNode.empty(); tensorNode = tensorNode.next_sibling("Tensor")) {
- KernelParam kp;
-
- auto typeStr = XMLParseUtils::GetStrAttr(tensorNode, "type");
- if (cmp(typeStr, "input")) {
- kp.type = CustomParamType::Input;
- } else if (cmp(typeStr, "output")) {
- kp.type = CustomParamType::Output;
- } else if (cmp(typeStr, "input_buffer")) {
- kp.type = CustomParamType::InputBuffer;
- } else if (cmp(typeStr, "output_buffer")) {
- kp.type = CustomParamType::OutputBuffer;
- } else if (cmp(typeStr, "data")) {
- kp.type = CustomParamType::Data;
- } else {
- VPU_THROW_EXCEPTION << "Tensor node has an invalid type " << typeStr;
+ assertOneOrMoreOccurrence(customLayer, {"Kernel"});
+ auto kernelNodes = [&] {
+ auto nodes = SmallVector<pugi::xml_node>{};
+ for (auto kernel = customLayer.child("Kernel"); !kernel.empty(); kernel = kernel.next_sibling("Kernel")) {
+ assertExactlyOneOccurrence(kernel, {"Parameters", "WorkSizes"});
+ assertOneOrMoreOccurrence(kernel, {"Source"});
+ nodes.push_back(kernel);
}
+ return nodes;
+ }();
- kp.format = formatFromString(XMLParseUtils::GetStrAttr(tensorNode, "format", "BFYX"));
- if (kp.format == CustomDataFormat::None) {
- VPU_THROW_EXCEPTION << "Tensor node has an invalid format " << kp.format;
- }
+ if (kernelNodes.size() == 1) {
+ _kernels.emplace_back(kernelNodes.front(), _configDir);
+ } else {
+ auto stageOrder = std::map<int, CustomKernel>{};
+ for (auto& kernel : kernelNodes) {
+ const auto stageAttr = kernel.attribute("stage");
+ VPU_THROW_UNLESS(stageAttr, "Error while binding %s custom layer: for multi-kernel binding, "
+ "each kernel should be provided with 'stage' attribute.", _layerName);
- kp.argName = XMLParseUtils::GetStrAttr(tensorNode, "arg-name");
- if (kp.argName.empty()) {
- VPU_THROW_EXCEPTION << "Tensor node has no arg-name";
- }
+ const auto stageNum = std::stod(stageAttr.value());
+ VPU_THROW_UNLESS(stageOrder.find(stageNum) == stageOrder.end(),
+ "Error while binding %s custom layer: found duplicating stage id.", _layerName);
- kp.portIndex = XMLParseUtils::GetIntAttr(tensorNode, "port-index", -1);
- if (kp.portIndex == -1) {
- VPU_THROW_EXCEPTION << "Tensor node has no port-index";
+ stageOrder.emplace(stageNum, CustomKernel{kernel, _configDir});
}
- if (kp.type == CustomParamType::InputBuffer || kp.type == CustomParamType::OutputBuffer) {
- std::string bufferSize(XMLParseUtils::GetStrAttr(tensorNode, "size", ""));
- while (!bufferSize.empty()) {
- auto pos = bufferSize.find_first_of(',');
- auto rule = bufferSize.substr(0, pos);
- if (!isLegalSizeRule(rule)) {
- VPU_THROW_EXCEPTION << "Invalid BufferSize " << rule;
- }
-
- kp.bufferSizeRules.emplace_back(std::move(rule));
-
- if (pos == std::string::npos) {
- bufferSize.clear();
- } else {
- bufferSize = bufferSize.substr(pos + 1, std::string::npos);
- }
- }
-
- kp.dimIdx = -1;
- std::string dim_src_string(XMLParseUtils::GetStrAttr(tensorNode, "dim", ""));
- if (!dim_src_string.empty()) {
- // Try to locate index separator.
- auto pos = dim_src_string.find_first_of(',');
- auto flag = dim_src_string.substr(0, pos);
- if (cmp(flag, "input")) {
- kp.dimSource = CustomDimSource::Input;
- } else if (cmp(flag, "output")) {
- kp.dimSource = CustomDimSource::Output;
- } else {
- VPU_THROW_EXCEPTION << "Invalid WG dim source " << flag;
- }
-
- int idx = 0;
- if (pos != std::string::npos) {
- // User explicitly set input index in config.
- auto idx_string = dim_src_string.substr(pos + 1, std::string::npos);
- idx = std::stoi(idx_string);
- }
- if (idx < 0) {
- VPU_THROW_EXCEPTION << "Invalid tensor index " << idx;
- }
+ VPU_THROW_UNLESS(stageOrder.begin()->first == 0,
+ "Error while binding %s custom layer: Stage 0 is not found.", _layerName);
+ VPU_THROW_UNLESS(stageOrder.rbegin()->first == stageOrder.size() - 1,
+ "Error while binding %s custom layer: Kernels should have stage id from 0 to N.", _layerName);
- kp.dimIdx = idx;
- }
+ for (auto& stage : stageOrder) {
+ _kernels.push_back(std::move(stage.second));
}
-
- kp.irSource.clear();
-
- _kernelParams.emplace_back(std::move(kp));
}
- for (auto dataNode = node.child("Data"); !dataNode.empty(); dataNode = dataNode.next_sibling("Data")) {
- KernelParam kp;
-
- auto typeStr = XMLParseUtils::GetStrAttr(dataNode, "type");
- if (cmp(typeStr, "data")) {
- kp.type = CustomParamType::Data;
- } else if (cmp(typeStr, "local_data")) {
- kp.type = CustomParamType::LocalData;
- } else {
- VPU_THROW_EXCEPTION << "Data node has an invalid type " << typeStr;
- }
-
- kp.format = CustomDataFormat::Any;
-
- kp.argName = XMLParseUtils::GetStrAttr(dataNode, "arg-name");
- if (kp.argName.empty()) {
- VPU_THROW_EXCEPTION << "Data node has no arg-name";
- }
-
- kp.portIndex = -1;
-
- kp.irSource = XMLParseUtils::GetStrAttr(dataNode, "source", "");
- std::string dim_src_string(XMLParseUtils::GetStrAttr(dataNode, "dim", ""));
-
- if (kp.irSource.empty() && dim_src_string.empty()) {
- VPU_THROW_EXCEPTION << "Data node has no source or dim";
+ const auto addPorts = [](std::map<int, CustomDataFormat>& ports, const CustomKernel::KernelParam& newEdge) {
+ const auto layerInput = ports.find(newEdge.portIndex);
+ if (layerInput == ports.end()) {
+ ports.emplace(newEdge.portIndex, newEdge.format);
+ } else if (newEdge.format == CustomDataFormat::Any) {
+ return;
+ } else if (layerInput->second == CustomDataFormat::Any) {
+ layerInput->second = newEdge.format;
}
+ };
- if (!kp.irSource.empty() && !dim_src_string.empty()) {
- VPU_THROW_EXCEPTION << "Data node can only have source or dim";
- }
-
- kp.dimIdx = -1;
- if (kp.type == CustomParamType::LocalData) {
- std::string bufferSize(XMLParseUtils::GetStrAttr(dataNode, "size", ""));
- while (!bufferSize.empty()) {
- auto pos = bufferSize.find_first_of(',');
- auto rule = bufferSize.substr(0, pos);
- if (!isLegalSizeRule(rule)) {
- VPU_THROW_EXCEPTION << "Invalid BufferSize " << rule;
- }
-
- kp.bufferSizeRules.emplace_back(std::move(rule));
-
- if (pos == std::string::npos) {
- bufferSize.clear();
- } else {
- bufferSize = bufferSize.substr(pos + 1, std::string::npos);
- }
+ for (const auto& kernel : _kernels) {
+ for (const auto& binding : kernel.bindings()) {
+ if (binding.type == CustomParamType::Input) {
+ addPorts(_inputs, binding);
}
-
- kp.dimIdx = -1;
- std::string dim_src_string(XMLParseUtils::GetStrAttr(dataNode, "dim", ""));
- if (!dim_src_string.empty()) {
- // Try to locate index separator.
- auto pos = dim_src_string.find_first_of(',');
- auto flag = dim_src_string.substr(0, pos);
- if (cmp(flag, "input")) {
- kp.dimSource = CustomDimSource::Input;
- } else if (cmp(flag, "output")) {
- kp.dimSource = CustomDimSource::Output;
- } else {
- VPU_THROW_EXCEPTION << "Invalid WG dim source " << flag;
- }
-
- int idx = 0;
- if (pos != std::string::npos) {
- // User explicitly set input index in config.
- auto idx_string = dim_src_string.substr(pos + 1, std::string::npos);
- idx = std::stoi(idx_string);
- }
- if (idx < 0) {
- VPU_THROW_EXCEPTION << "Invalid tensor index " << idx;
- }
-
- kp.dimIdx = idx;
+ if (binding.type == CustomParamType::Output) {
+ addPorts(_outputs, binding);
}
}
-
- _kernelParams.emplace_back(std::move(kp));
- }
-
- for (auto scalarNode = node.child("Scalar"); !scalarNode.empty(); scalarNode = scalarNode.next_sibling("Scalar")) {
- KernelParam kp;
-
- std::string typeStr = XMLParseUtils::GetStrAttr(scalarNode, "type");
- if (cmp(typeStr, "int")) {
- kp.type = CustomParamType::Int;
- } else if (cmp(typeStr, "float")) {
- kp.type = CustomParamType::Float;
- } else {
- VPU_THROW_EXCEPTION << "Scalar node has an invalid type " << typeStr;
- }
-
- kp.format = CustomDataFormat::Any;
-
- kp.argName = XMLParseUtils::GetStrAttr(scalarNode, "arg-name");
- if (kp.argName.empty()) {
- VPU_THROW_EXCEPTION << "Scalar node has no arg-name";
- }
-
- kp.portIndex = XMLParseUtils::GetIntAttr(scalarNode, "port-index", 0);
-
- kp.irSource = XMLParseUtils::GetStrAttr(scalarNode, "source", "");
- if (kp.irSource.empty()) {
- VPU_THROW_EXCEPTION << "Scalar node has no source";
- }
-
- _kernelParams.emplace_back(std::move(kp));
}
}
-void CustomLayer::processWorkSizesNode(const pugi::xml_node & node) {
- ie::details::CaselessEq<std::string> cmp;
-
- std::string nodeName(node.name());
- if (!cmp(node.name(), "WorkSizes")) {
- VPU_THROW_EXCEPTION << "Wrong node, expected WorkSizes found " << nodeName;
- }
-
- _wgDimIdx = -1;
- std::string dim_src_string(node.attribute("dim").as_string(""));
- if (!dim_src_string.empty()) {
- // Try to locate index separator.
- auto pos = dim_src_string.find_first_of(',');
- auto flag = dim_src_string.substr(0, pos);
- if (cmp(flag, "input")) {
- _wgDimSource = CustomDimSource::Input;
- } else if (cmp(flag, "output")) {
- _wgDimSource = CustomDimSource::Output;
- } else {
- VPU_THROW_EXCEPTION << "Invalid WG dim source " << flag;
- }
-
- int idx = 0;
- if (pos != std::string::npos) {
- // User explicitly set input index in config.
- auto idx_string = dim_src_string.substr(pos + 1, std::string::npos);
- idx = std::stoi(idx_string);
- }
- if (idx < 0) {
- VPU_THROW_EXCEPTION << "Invalid tensor index " << idx;
- }
-
- _wgDimIdx = idx;
- }
-
- std::string gws(node.attribute("global").as_string(""));
- while (!gws.empty()) {
- auto pos = gws.find_first_of(',');
- auto rule = gws.substr(0, pos);
- if (!isLegalSizeRule(rule)) {
- VPU_THROW_EXCEPTION << "Invalid WorkSize " << rule;
- }
-
- _globalSizeRules.emplace_back(std::move(rule));
+bool CustomLayer::isLegalSizeRule(const std::string& rule, std::map<std::string, std::string> layerParams) {
+ {
+ auto sizes = SmallVector<std::pair<std::string, std::string>> {
+ { "b", "1" }, { "B", "1" },
+ { "f", "1" }, { "F", "1" },
+ { "y", "1" }, { "Y", "1" },
+ { "x", "1" }, { "X", "1" },
+ };
- if (pos == std::string::npos) {
- gws.clear();
- } else {
- gws = gws.substr(pos + 1, std::string::npos);
- }
- }
-
- std::string lws(node.attribute("local").as_string(""));
- while (!lws.empty()) {
- auto pos = lws.find_first_of(',');
- auto rule = lws.substr(0, pos);
- if (!isLegalSizeRule(rule)) {
- VPU_THROW_EXCEPTION << "Invalid WorkSize " << rule;
- }
-
- _localSizeRules.emplace_back(std::move(rule));
-
- if (pos == std::string::npos) {
- lws.clear();
- } else {
- lws = lws.substr(pos + 1, std::string::npos);
- }
+ std::move(begin(sizes), end(sizes), inserter(layerParams, end(layerParams)));
}
-}
-
-bool CustomLayer::isLegalSizeRule(const std::string& rule) {
- SimpleMathExpression expr;
- expr.setVariables({
- { 'b', 1 }, { 'B', 1 },
- { 'f', 1 }, { 'F', 1 },
- { 'y', 1 }, { 'Y', 1 },
- { 'x', 1 }, { 'X', 1 },
- });
+ MathExpression expr;
+ expr.setVariables(layerParams);
try {
expr.parse(rule);
return true;
}
-CustomDataFormat CustomLayer::formatFromString(const std::string & str) {
- static const ie::details::caseless_map<std::string, CustomDataFormat> FormatNameToType = {
- { "BFYX" , CustomDataFormat::BFYX },
- { "BYXF" , CustomDataFormat::BYXF },
- { "FYX" , CustomDataFormat::FYX },
- { "YXF" , CustomDataFormat::YXF },
- { "ANY" , CustomDataFormat::Any },
+CustomDataFormat CustomLayer::formatFromLayout(const InferenceEngine::Layout& layout) {
+ const auto layoutToFormat = std::map<ie::Layout, CustomDataFormat> {
+ { ie::NCHW , CustomDataFormat::BFYX },
+ { ie::NHWC , CustomDataFormat::BYXF },
+ { ie::CHW , CustomDataFormat::FYX },
+ { ie::NC , CustomDataFormat::BF },
+ { ie::ANY , CustomDataFormat::Any }
};
- auto it = FormatNameToType.find(str);
- if (it != FormatNameToType.end()) {
- return it->second;
- }
+ const auto it = layoutToFormat.find(layout);
+ VPU_THROW_UNLESS(it != layoutToFormat.end(), "Tensor node has an invalid format %s", layout);
+ return it->second;
+}
+
+bool CustomLayer::meetsWhereRestrictions(const std::map<std::string, std::string>& params) const {
+ const auto cmp = ie::details::CaselessEq<std::string>{};
+
+ for (const auto& where : _whereParams) {
+ const auto restrictedParam = [&](const std::pair<std::string, std::string>& param) {
+ return param.first == where.first;
+ };
+
+ const auto param = std::find_if(begin(params), end(params), restrictedParam);
+ if (param == params.end()) {
+ return false;
+ }
- return CustomDataFormat::None;
+ const auto& restriction = where.second;
+ const auto number = parseNumber<float>(param->second);
+
+ const auto meetsRestriction = [&] {
+ // compare non-number restrictions (ex. kernel="3,3")
+ if (!number.hasValue()) {
+ return cmp(param->second, restriction);
+ } else {
+ if (restriction[0] == '>' && restriction[1] == '=') {
+ const auto to_compare = std::stof(restriction.substr(2, std::string::npos));
+ return number.get() >= to_compare;
+ } else if (restriction[0] == '<' && restriction[1] == '=') {
+ const auto to_compare = std::stof(restriction.substr(2, std::string::npos));
+ return number.get() <= to_compare;
+ } else if (restriction[0] == '>') {
+ const auto to_compare = std::stof(restriction.substr(1, std::string::npos));
+ return number.get() > to_compare;
+ } else if (restriction[0] == '<') {
+ const auto to_compare = std::stof(restriction.substr(1, std::string::npos));
+ return number.get() < to_compare;
+ } else if (restriction[0] == '!' && restriction[1] == '=') {
+ const auto to_compare = std::stof(restriction.substr(2, std::string::npos));
+ return number.get() != to_compare;
+ }
+ return number.get() == std::stof(restriction);
+ }
+ }();
+
+ if (!meetsRestriction) {
+ return false;
+ }
+ }
+ return true;
}
} // namespace vpu
#include <map>
#include <vector>
#include <utility>
+#include <string>
+
+#include <convert_function_to_cnn_network.hpp>
+#include <generic_ie.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
+#include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
+#include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
namespace vpu {
{"StaticShapeNonZero", LAYER_PARSER(parseNonZero)},
{"ROIAlign", LAYER_PARSER(parseROIAlign)},
{"DynamicShapeResolver", LAYER_PARSER(parseDSR)},
+ {"OutShapeOfReshape", LAYER_PARSER(parseOutShapeOfReshape)},
+ {"StaticShapeBroadcast", LAYER_PARSER(parseBroadcast)},
}} {}
ModelPtr FrontEnd::buildInitialModel(ie::ICNNNetwork& network) {
std::atomic<int> g_counter(0);
-bool hasSuitableCustom(
- const std::vector<CustomLayer::Ptr>& customLayers,
- const ie::CNNLayerPtr& layer) {
- const auto& env = CompileEnv::get();
- ie::details::CaselessEq<std::string> cmp;
+} // namespace
- env.log->trace("Check for suitable custom implementation for layer %s:%s", layer->name, layer->type);
+CustomLayer::Ptr FrontEnd::getSuitableCustomLayer(const std::vector<CustomLayer::Ptr>& customLayers,
+ const ie::CNNLayerPtr& cnnLayer) {
+ const auto& env = CompileEnv::get();
+ env.log->trace("Check for suitable custom implementation for layer %s:%s",
+ cnnLayer->name, cnnLayer->type);
VPU_LOGGER_SECTION(env.log);
- for (const auto& customLayer : customLayers) {
- env.log->trace("Check next custom layer : %v", customLayer->whereParams());
+ const auto cnnInputs = [&] {
+ auto inputs = SmallVector<CustomDataFormat>{};
+ inputs.reserve(cnnLayer->insData.size());
+ for (const auto& input : cnnLayer->insData) {
+ const auto layout = input.lock()->getLayout();
+ const auto format = CustomLayer::formatFromLayout(layout);
+ inputs.push_back(format);
+ }
+ return inputs;
+ }();
+
+ const auto cnnOutputs = [&] {
+ auto outputs = SmallVector<CustomDataFormat>{};
+ outputs.reserve(cnnLayer->outData.size());
+ for (const auto& output : cnnLayer->outData) {
+ const auto layout = output->getLayout();
+ const auto format = CustomLayer::formatFromLayout(layout);
+ outputs.push_back(format);
+ }
+ return outputs;
+ }();
+
+ const auto isSuitableLayer = [&env, &cnnLayer](const CustomLayer::Ptr& customLayer) {
+ env.log->trace("Check next custom layer : %v", customLayer->layerName());
VPU_LOGGER_SECTION(env.log);
- bool suitable = true;
- for (const auto& whereParam : customLayer->whereParams()) {
- const auto iter = layer->params.find(whereParam.first);
- if (iter == layer->params.end() || !cmp(iter->second, whereParam.second)) {
- suitable = false;
- break;
+ if (!customLayer->meetsWhereRestrictions(cnnLayer->params)) {
+ env.log->trace("Where restrictions are not met");
+ return false;
+ }
+
+ for (const auto& kernel : customLayer->kernels()) {
+ const auto& gws = kernel.globalGridSizeRules();
+ const auto& lws = kernel.localGridSizeRules();
+
+ const auto validSizeRule = [&](const std::string& rule) {
+ return CustomLayer::isLegalSizeRule(rule, cnnLayer->params);
+ };
+
+ const auto validGridSizes = std::all_of(begin(gws), end(gws), validSizeRule) &&
+ std::all_of(begin(lws), end(lws), validSizeRule);
+
+ if (!validGridSizes) {
+ env.log->trace("Work group grid sizes are not valid");
+ return false;
+ }
+ }
+
+ return true;
+ };
+
+ auto suitableCustomLayers = SmallVector<CustomLayer::Ptr>{};
+
+ std::copy_if(begin(customLayers), end(customLayers),
+ back_inserter(suitableCustomLayers), isSuitableLayer);
+
+ if (suitableCustomLayers.empty()) {
+ return nullptr;
+ }
+
+ const auto inputsLayoutMatch = [&](const SmallVector<CustomDataFormat>& cnnEdges,
+ const std::map<int, CustomDataFormat>& clEdges) {
+ for (const auto clEdge : clEdges) {
+ const auto port = clEdge.first;
+ VPU_THROW_UNLESS(port < cnnEdges.size(),
+ "Can't bind custom layer edge with port '%s' to CNNNetwork layer", port);
+
+ const auto clFormat = clEdge.second;
+ const auto cnnFormat = cnnEdges[port];
+ if (cnnFormat != clFormat &&
+ cnnFormat != CustomDataFormat::Any &&
+ clFormat != CustomDataFormat::Any) {
+ return false;
}
}
+ return true;
+ };
+
- if (suitable) {
- env.log->trace("Matches");
- return true;
+ for (const auto& customLayer : suitableCustomLayers) {
+ const auto clInputs = customLayer->inputs();
+
+ if (inputsLayoutMatch(cnnInputs, clInputs)) {
+ env.log->trace("Found suitable '%s' custom layer", customLayer->layerName());
+ return customLayer;
}
}
- return false;
+ const auto firstGoodLayer = suitableCustomLayers.front();
+ env.log->trace("Found suitable custom layer '%s', but input layouts "
+ "have not matched with what CNNNetwork expected",
+ firstGoodLayer->layerName());
+ return firstGoodLayer;
}
-} // namespace
void FrontEnd::parseLayer(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) {
parseLayer(model, layer, inputs, outputs,
void FrontEnd::parseLayer(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs,
const FrontEnd::UnsupportedLayerCallback& onUnsupported, const FrontEnd::SupportedLayerCallback& onSupported) {
- const auto customLayerByType = _customLayers.find(layer->type);
- const auto customLayerAsStage = _customLayers.find(layer->type + "@stage_0");
-
- const bool isCustomLayer =
- ((customLayerByType != _customLayers.end()) && hasSuitableCustom(customLayerByType->second, layer)) ||
- ((customLayerAsStage != _customLayers.end()) && hasSuitableCustom(customLayerAsStage->second, layer));
+ const auto customLayer = _customLayers.find(layer->type);
+ const bool isCustomLayer = customLayer != _customLayers.end() && getSuitableCustomLayer(customLayer->second, layer);
const auto& type = isCustomLayer ? "Custom" : layer->type;
if (parsers.count(type) == 0) {
ModelPtr FrontEnd::runCommonPasses(ie::ICNNNetwork& network, const UnsupportedLayerCallback& unsupportedLayer, const SupportedLayerCallback& supportedLayer) {
+ // NGraph -> CNN conversion may be called in 2 different moments: at
+ // the beginning if conversion was forced by configuration or after detect
+ // network batch and precision conversions. Conversion utility
+ // returns std::shared_ptr. ICNNNetwork is neither copyable nor movable.
+ // As a result, it is impossible to overwrite given "network" argument.
+ // Do not use network parameter in this function to avoid using wrong network
+ // reference (e.g. original instead of converted).
+ auto* originalOrConvertNetwork = &network;
+
const auto& env = CompileEnv::get();
//
// Create new VPU model
//
- const auto model = std::make_shared<ModelObj>(network.getName());
+ const auto model = std::make_shared<ModelObj>(originalOrConvertNetwork->getName());
model->attrs().set<int>("index", g_counter.fetch_add(1));
model->attrs().set<Resources>("resources", env.resources);
if (!env.config.ignoreIRStatistic) {
ie::ICNNNetworkStats* stats = nullptr;
// V10 IRs doesn't contain stats
- if (network.getStats(&stats, nullptr) == InferenceEngine::OK && !stats->isEmpty()) {
+ if (originalOrConvertNetwork->getStats(&stats, nullptr) == InferenceEngine::OK && !stats->isEmpty()) {
env.log->trace("Use node statistics from the IR");
model->setNodesStats(stats->getNodesStats());
}
// Update IE Network
//
+ std::shared_ptr<ie::ICNNNetwork> convertedNetwork;
+
{
env.log->trace("Update IE Network");
VPU_LOGGER_SECTION(env.log);
- IE_SUPPRESS_DEPRECATED_START
- // If we have NGraph network, but CNN compatibility is enabled, enforce conversion
- if (network.getFunction() && env.config.forceDeprecatedCnnConversion)
- network.addLayer(nullptr);
- IE_SUPPRESS_DEPRECATED_END
+ auto convertNetwork = [&convertedNetwork, &originalOrConvertNetwork]() {
+ auto nGraphFunc = originalOrConvertNetwork->getFunction();
+ // Disable shape inference (WA for generic operations)
+ ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
+
+ ngraph::pass::ConvertOpSet3ToOpSet2().run_on_function(nGraphFunc);
+ ngraph::pass::ConvertOpSet2ToOpSet1().run_on_function(nGraphFunc);
+ ngraph::pass::ConvertOpSet1ToLegacy().run_on_function(nGraphFunc);
+ convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *originalOrConvertNetwork);
+ originalOrConvertNetwork = convertedNetwork.get();
+ };
- detectNetworkBatch(network, model);
+ if (originalOrConvertNetwork->getFunction() && env.config.forceDeprecatedCnnConversion) {
+ convertNetwork();
+ }
+
+ detectNetworkBatch(*originalOrConvertNetwork, model);
- ie::NetPass::ConvertPrecision(network, ie::Precision::I64, ie::Precision::I32);
- ie::NetPass::ConvertPrecision(network, ie::Precision::U64, ie::Precision::I32);
- ie::NetPass::ConvertPrecision(network, ie::Precision::BOOL, ie::Precision::I32);
+ if (originalOrConvertNetwork->getFunction()) {
+ convertNetwork();
+ }
- IE_SUPPRESS_DEPRECATED_START
- // force conversion to CNNNetwork
- if (network.getFunction())
- network.addLayer(nullptr);
- IE_SUPPRESS_DEPRECATED_END
+ ie::NetPass::ConvertPrecision(*originalOrConvertNetwork, ie::Precision::I64, ie::Precision::I32);
+ ie::NetPass::ConvertPrecision(*originalOrConvertNetwork, ie::Precision::U64, ie::Precision::I32);
+ ie::NetPass::ConvertPrecision(*originalOrConvertNetwork, ie::Precision::BOOL, ie::Precision::I32);
- moveConstInputsToBlobs(network);
+ moveConstInputsToBlobs(*originalOrConvertNetwork);
- removeConstLayers(network);
+ removeConstLayers(*originalOrConvertNetwork);
- unrollLoops(network);
+ unrollLoops(*originalOrConvertNetwork);
}
//
// Parse IR Network
//
- _ieParsedNetwork = parseNetwork(network);
+ _ieParsedNetwork = parseNetwork(*originalOrConvertNetwork);
//
// Process internal VPU Model
parseInputAndOutputData(model);
- addDataTypeConvertStages(model);
+ if (!CompileEnv::get().config.disableConvertStages) {
+ addDataTypeConvertStages(model);
+ }
addPreProcessStages(model);
}
shapeLocation.dimsLocation = dataLocation.location;
shapeLocation.dimsOffset = dataLocation.offset;
+
+ if (data->usage() == DataUsage::Output) {
+ // We need to allocate memory for maximum dims values also
+ data->attrs().set<int>("ioDimsUpperBoundOffset", _blobMemOffset);
+ _blobMemOffset += dimsByteSize;
+ }
} else {
// Static allocation
shapeLocation.dimsLocation = Location::Blob;
}
void Allocator::freeData(const Data& data, DeallocationMode mode) {
- //
- // Release the chunk
- //
-
- if (const auto& parentDataToShapeEdge = data->parentDataToShapeEdge()) {
- auto const& parent = parentDataToShapeEdge->parent();
+ const auto getChunk = [this, &data](const Data& parent) {
+ VPU_THROW_UNLESS(_allocatedIntermData.count(parent) > 0,
+ "Allocator failed on freeData for {} with usage {}: parent data {} with usage {} is not allocated",
+ data->name(), data->usage(), parent->name(), parent->usage());
- auto it = _memChunksPerData.find(parentDataToShapeEdge->parent());
- auto chunk = it->second;
+ auto it = _memChunksPerData.find(parent);
VPU_INTERNAL_CHECK(it != _memChunksPerData.end(),
"Allocator failed on freeData for {} with usage {}: parent data {} with usage {} "
"containing shape for current data wasn't yet allocated",
data->name(), data->usage(), parent->name(), parent->usage());
+ auto chunk = it->second;
+
VPU_INTERNAL_CHECK(chunk != nullptr,
"Allocator failed on freeData for {} with usage {}: parent data {} with usage {} "
"containing shape for current data has no memory chunk",
"containing shape for this data has zero usages, but it is using at least by current data",
data->name(), data->usage(), parent->name(), parent->usage());
+ return chunk;
+ };
+
+ const auto decreaseChunkUsage = [this](allocator::MemChunk* chunk, const Data& parent) {
--chunk->inUse;
if (chunk->inUse == 0) {
_memChunksPerData.erase(parent);
_allocatedIntermData.erase(parent);
}
+ };
+
+ //
+ // Release the chunk
+ //
+
+ if (const auto& parentDataToShapeEdge = data->parentDataToShapeEdge()) {
+ auto const& parent = parentDataToShapeEdge->parent();
+
+ if (parent->usage() == DataUsage::Intermediate || parent->usage() == DataUsage::Temp) {
+ auto chunk = getChunk(parent);
+ decreaseChunkUsage(chunk, parent);
+ }
}
auto topParent = data->getTopParentData();
if (topParent->usage() == DataUsage::Intermediate ||
topParent->usage() == DataUsage::Temp) {
- IE_ASSERT(_allocatedIntermData.count(topParent) > 0);
-
- auto it = _memChunksPerData.find(topParent);
- IE_ASSERT(it != _memChunksPerData.end());
-
- auto chunk = it->second;
- IE_ASSERT(chunk != nullptr);
- IE_ASSERT(chunk->inUse > 0);
+ auto chunk = getChunk(topParent);
switch (mode) {
case DeallocationMode::JustFree: {
- --chunk->inUse;
-
- if (chunk->inUse == 0) {
- freeMem(chunk);
-
- _memChunksPerData.erase(topParent);
- _allocatedIntermData.erase(topParent);
- }
-
+ decreaseChunkUsage(chunk, topParent);
break;
}
// initial dump pass must be the first dump
ADD_DUMP_PASS("initial");
+ //
+ // Replace Global AvgPooling with ReduceMean
+ //
+
+ if (env.config.enableReplaceWithReduceMean) {
+ ADD_PASS(replaceWithReduceMean);
+ ADD_DUMP_PASS("replaceWithReduceMean");
+ }
+
+
if (!env.config.disableReorder && !env.config.hwOptimization) {
ADD_PASS(reorderInputsToChannelMinor);
ADD_DUMP_PASS("reorderInputsToChannelMinor");
ADD_DUMP_PASS("mergeParallelFC");
//
- // Replace Global AvgPooling with ReduceMean
- //
-
- if (env.config.enableReplaceWithReduceMean) {
- ADD_PASS(replaceWithReduceMean);
- ADD_DUMP_PASS("replaceWithReduceMean");
- }
-
- //
// Model common adaptation
//
ADD_PASS(hwPadding);
ADD_DUMP_PASS("hwPadding");
+ if (env.config.hwOptimization) {
+ ADD_PASS(splitLargeKernelConv);
+ ADD_DUMP_PASS("splitLargeKernelConv");
+ }
+
//
// Batch support
//
env.log->trace("Child data [%s] : mode [%v] offset [%v]", childData->name(), mode, offset);
- model->replaceParentData(childDataEdge, ddrCopy);
+ model->replaceDataToDataParent(childDataEdge, ddrCopy);
loopOverData(childData, [](const Data& subData) {
subData->setMemReqs(MemoryType::DDR);
class PassImpl final : public PerStagePass {
public:
- PassImpl() : PerStagePass({StageType::Concat}) {}
+ PassImpl() : PerStagePass({StageType::StubConcat}) {}
void runForStage(const Model& model, const Stage& stage) override;
};
//
if (dataEdge->connectionMode() == SharedConnectionMode::SINGLE_STAGE) {
- if (connectionStage->type() == StageType::Concat ||
+ if (connectionStage->type() == StageType::StubConcat ||
connectionStage->type() == StageType::Expand) {
IE_ASSERT(producer == child);
IE_ASSERT(consumer == parent);
continue;
}
- if (stage->type() == StageType::Concat) {
+ if (stage->type() == StageType::StubConcat) {
_processor.processConcat(model, stage);
} else if (stage->type() == StageType::Split) {
_processor.processSplit(model, stage);
continue;
}
- const auto nextStages = getExactNextStages(convolutionStage, {StageType::Power, StageType::Concat});
+ const auto nextStages = getExactNextStages(convolutionStage, {StageType::Power, StageType::StubConcat});
if (nextStages.size() != 2 || convolutionStage->type() != StageType::StubConv) {
continue;
}
continue;
}
- auto concatAfterPowerStage = getOneOfSingleNextStage(powerStage, {StageType::Concat});
+ auto concatAfterPowerStage = getOneOfSingleNextStage(powerStage, {StageType::StubConcat});
if (concatAfterPowerStage != concatStage) {
continue;
}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/middleend/pass_manager.hpp>
+#include <vpu/utils/numeric.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+#include <precision_utils.h>
+
+#include <vector>
+#include <set>
+#include <memory>
+
+
+namespace vpu {
+
+namespace {
+
+class PassImpl final : public Pass {
+public:
+ explicit PassImpl(const StageBuilder::Ptr& stageBuilder) : _stageBuilder(stageBuilder) {}
+
+ void run(const Model& model) override;
+
+private:
+ StageBuilder::Ptr _stageBuilder;
+};
+
+void PassImpl::run(const Model& model) {
+ VPU_PROFILE(splitLargeKernelConv);
+
+ for (const auto& stage : model->getStages()) {
+ if (stage->type() != StageType::StubConv) {
+ continue;
+ }
+ const auto tryHW = stage->attrs().getOrDefault<bool>("tryHW", false);
+ if (!tryHW) {
+ continue;
+ }
+
+ IE_ASSERT(stage->numInputs() == 4);
+ IE_ASSERT(stage->numOutputs() == 1);
+
+ const auto input = stage->input(0);
+ const auto weights = stage->input(1);
+ const auto biases = stage->input(2);
+ const auto scales = stage->input(3);
+ const auto output = stage->output(0);
+
+ const auto kernelSizeX = stage->attrs().get<int>("kernelSizeX");
+ const auto kernelSizeY = stage->attrs().get<int>("kernelSizeY");
+ const auto groupSize = stage->attrs().get<int>("groupSize");
+ const auto inputC = input->desc().dim(Dim::C);
+ const auto outputC = output->desc().dim(Dim::C);
+
+ //Only 1x16 convolution is supported now, could expand support up to 1x30
+ if (kernelSizeX != 16 || kernelSizeY != 1 || groupSize != 1) {
+ continue;
+ }
+
+ model->disconnectStage(stage);
+
+ int kernelGroupSize = kernelSizeX / 16 + 1;
+ IE_ASSERT(kernelGroupSize == 2);
+ const auto newKernelSizeX = kernelSizeX / kernelGroupSize;
+ IE_ASSERT(newKernelSizeX * kernelGroupSize == kernelSizeX);
+ const auto inGroupDimX = input->desc().dim(Dim::W) - newKernelSizeX;
+
+ DataVector subInputs(kernelGroupSize);
+ DataVector subOutputs(kernelGroupSize);
+
+ for (int groupInd = 0; groupInd < kernelGroupSize; ++groupInd) {
+ auto postfix = formatString("@subkernel=%d/%d", groupInd + 1, kernelGroupSize);
+
+ //subInput
+ auto subInputDesc = input->desc();
+ subInputDesc.setDim(Dim::W, inGroupDimX);
+
+ subInputs[groupInd] = model->duplicateData(
+ input,
+ postfix,
+ subInputDesc);
+
+ //subShrinkStage
+ DimValues offsets({{Dim::W, groupInd * newKernelSizeX}});
+
+ _stageBuilder->addCropStage(
+ model,
+ stage->name() + postfix,
+ stage->origLayer(),
+ input,
+ subInputs[groupInd],
+ std::move(offsets));
+
+ // subWeights
+ Data subWeights;
+ {
+ const auto content = weights->content();
+ IE_ASSERT(content != nullptr);
+
+ const auto origWeights = content->get<fp16_t>();
+ IE_ASSERT(origWeights != nullptr);
+
+ size_t newWeightsSize = newKernelSizeX * kernelSizeY * outputC * inputC;
+
+ auto newWeightsBlob = ie::make_shared_blob<fp16_t>(InferenceEngine::TensorDesc(
+ ie::Precision::FP16,
+ {newWeightsSize},
+ ie::Layout::C));
+ newWeightsBlob->allocate();
+
+ const auto newWeightsPtr = newWeightsBlob->buffer().as<fp16_t*>();
+ auto src = origWeights + groupInd * newKernelSizeX;
+ auto dst = newWeightsPtr;
+
+ for (int i = 0; i < kernelSizeY * inputC * outputC; ++i) {
+ std::copy_n(src + i * kernelSizeX, newKernelSizeX, dst + i * newKernelSizeX);
+ }
+
+ subWeights = model->duplicateData(
+ weights,
+ postfix,
+ DataDesc({newKernelSizeX, kernelSizeY, inputC, outputC}),
+ ieBlobContent(newWeightsBlob));
+ }
+
+ // subOutput
+ auto subOutputDesc = output->desc();
+
+ subOutputs[groupInd] = model->duplicateData(
+ output,
+ postfix,
+ subOutputDesc);
+
+ // subConvStage
+ auto subConvStage = model->duplicateStage(
+ stage,
+ postfix,
+ {subInputs[groupInd], subWeights, biases, scales},
+ {subOutputs[groupInd]});
+
+ subConvStage->attrs().set<int>("kernelSizeX", newKernelSizeX);
+ }
+
+ _stageBuilder->addSumStage(
+ model,
+ stage->name() + "@sum",
+ stage->origLayer(),
+ subOutputs[0],
+ subOutputs[1],
+ output);
+
+ model->removeStage(stage);
+ }
+}
+
+} // namespace
+
+Pass::Ptr PassManager::splitLargeKernelConv() {
+ return std::make_shared<PassImpl>(_stageBuilder);
+}
+
+} // namespace vpu
if (concatStage == nullptr)
continue;
- if (concatStage->type() != StageType::Concat)
+ if (concatStage->type() != StageType::StubConcat)
continue;
IE_ASSERT(concatStage->numInputs() > 0);
void DataNode::serializeBuffer(
BlobSerializer& serializer) {
- serializeDescImpl(serializer, _desc, this->strides());
+ serializeDescImpl(serializer, _desc, this->shapeLocation());
serializer.append(checked_cast<uint32_t>(_dataLocation.location));
+ const auto serializeIOParams = [&serializer](const Data& parent) {
+ auto IOIdx = parent->attrs().get<int>("ioIdx");
+ serializer.append(checked_cast<uint32_t>(IOIdx));
+
+ auto parentByteSize = parent->totalByteSize();
+ serializer.append(checked_cast<uint32_t>(parentByteSize));
+ };
+
if (_dataLocation.location == Location::Input || _dataLocation.location == Location::Output) {
- auto topParent = getTopParentData();
+ serializeIOParams(getTopParentData());
+ }
- auto ioIdx = topParent->attrs().get<int>("ioIdx");
- serializer.append(checked_cast<uint32_t>(ioIdx));
+ if (_shapeLocation.dimsLocation == Location::Output) {
+ serializeIOParams(parentDataToShapeEdge()->parent());
+ }
- auto parentByteSize = topParent->totalByteSize();
- serializer.append(checked_cast<uint32_t>(parentByteSize));
+ if (_shapeLocation.stridesLocation == Location::Output) {
+ serializeIOParams(parentDataToShapeEdge()->parent());
}
serializer.append(checked_cast<uint32_t>(_dataLocation.offset));
}
void DataNode::serializeIOInfo(BlobSerializer& serializer) const {
- auto ioIdx = attrs().get<int>("ioIdx");
- serializer.append(checked_cast<uint32_t>(ioIdx));
+ auto dataIOIdx = attrs().get<int>("ioIdx");
+ serializer.append(checked_cast<uint32_t>(dataIOIdx));
auto ioBufferOffset = attrs().get<int>("ioBufferOffset");
serializer.append(checked_cast<uint32_t>(ioBufferOffset));
serializer.append(uint8_t(0));
}
- serializeDescImpl(serializer, _desc, strides());
+ auto resShapeLocation = shapeLocation();
+ if (resShapeLocation.dimsLocation != Location::Blob) {
+ auto ioDimsUpperBoundOffset = attrs().get<int>("ioDimsUpperBoundOffset");
+ resShapeLocation.dimsLocation = Location::Blob;
+ resShapeLocation.dimsOffset = ioDimsUpperBoundOffset;
+ }
+ if (resShapeLocation.stridesLocation != Location::Blob) {
+ auto ioStridesUpperBoundOffset = attrs().get<int>("ioStridesUpperBoundOffset");
+ resShapeLocation.stridesLocation = Location::Blob;
+ resShapeLocation.stridesOffset = ioStridesUpperBoundOffset;
+ }
+
+ serializeDescImpl(serializer, _desc, resShapeLocation);
}
void DataNode::serializeDescImpl(
BlobSerializer& serializer,
const DataDesc& storedDesc,
- const DimValues& storedStrides) const {
+ const ShapeLocation& shapeLocation) const {
IE_ASSERT(storedDesc.numDims() <= MAX_DIMS_32);
auto storedDimsOrder = storedDesc.dimsOrder();
serializer.append(checked_cast<uint32_t>(storedPerm.size()));
- const auto& shape = shapeLocation();
-
- serializer.append(checked_cast<uint32_t>(shape.dimsLocation));
- serializer.append(checked_cast<uint32_t>(shape.dimsOffset));
- serializer.append(checked_cast<uint32_t>(shape.stridesLocation));
- serializer.append(checked_cast<uint32_t>(shape.stridesOffset));
+ serializer.append(checked_cast<uint32_t>(shapeLocation.dimsLocation));
+ serializer.append(checked_cast<uint32_t>(shapeLocation.dimsOffset));
+ serializer.append(checked_cast<uint32_t>(shapeLocation.stridesLocation));
+ serializer.append(checked_cast<uint32_t>(shapeLocation.stridesOffset));
}
void printTo(std::ostream& os, const Data& data) {
data->_dependentStagesEdges.push_back(edge);
- VPU_THROW_UNLESS(data->usage() == DataUsage::Intermediate,
- "Adding stage dependency for {} with type {} failed: only {} datas can be added as a dependency "
- "while adding {} with usage {} was attempted",
- stage->name(), stage->type(), DataUsage::Intermediate, data->name(), data->usage());
-
VPU_THROW_UNLESS(data->_producerEdge != nullptr,
"Adding stage dependency for {} with type {} failed: data {} with usage {} should have producer, "
"but actually it doesn't", stage->name(), stage->type(), data->name(), data->usage());
- if (data->_producerEdge != nullptr) {
- ++data->_producerEdge->_producer->_nextStages[stage];
- ++stage->_prevStages[data->_producerEdge->_producer];
- }
+ ++data->_producerEdge->_producer->_nextStages[stage];
+ ++stage->_prevStages[data->_producerEdge->_producer];
return edge;
}
// Check connection stage type and that parent has the largest buffer.
//
- if (connectionStage->type() == StageType::Concat ||
+ if (connectionStage->type() == StageType::StubConcat ||
connectionStage->type() == StageType::Expand) {
IE_ASSERT(producer == child);
IE_ASSERT(consumer == parent);
const auto& parentStage = parent->producer();
const auto& childStage = child->producer();
- if (parentStage && childStage && parentStage != childStage && parent->usage() == DataUsage::Intermediate) {
+ const auto& areStagesDifferent = [](const Stage& lhs, const Stage& rhs) {
+ return lhs && rhs && lhs != rhs;
+ };
+
+ if (areStagesDifferent(parentStage, childStage)) {
// Shape and data are produced from different stages, make sure that shape is calculated before data
addStageDependency(childStage, parent);
}
return edge;
}
-void ModelObj::replaceParentData(
+void ModelObj::replaceDataToShapeParent(
+ const DataToShapeAllocation& edge,
+ const Data& newParent) {
+ auto oldParent = edge->parent();
+ auto child = edge->child();
+
+ oldParent->_childDataToShapeEdges.erase(edge);
+ edge->_parent = newParent;
+ newParent->_childDataToShapeEdges.push_back(edge);
+}
+
+void ModelObj::replaceDataToShapeChild(
+ const DataToShapeAllocation& edge,
+ const Data& newChild) {
+ auto parent = edge->parent();
+ auto oldChild = edge->child();
+
+ oldChild->_parentDataToShapeEdge = nullptr;
+ edge->_child = newChild;
+
+ VPU_THROW_UNLESS(newChild->_parentDataToShapeEdge == nullptr,
+ "replaceDataToShapeChild failed: newChild {} with usage {} already has parent {} with usage {}",
+ newChild->name(), newChild->usage(), newChild->_parentDataToShapeEdge->parent()->name(), newChild->_parentDataToShapeEdge->parent()->usage());
+
+ newChild->_parentDataToShapeEdge = edge;
+}
+
+void ModelObj::replaceDataToDataParent(
const DataToDataAllocation& edge,
const Data& newParent) {
auto oldParent = edge->parent();
}
}
-void ModelObj::replaceChildData(
+void ModelObj::replaceDataToDataChild(
const DataToDataAllocation& edge,
const Data& newChild) {
auto parent = edge->parent();
VPU_CONFIG_KEY(ENABLE_REPLACE_WITH_REDUCE_MEAN),
VPU_CONFIG_KEY(ENABLE_TENSOR_ITERATOR_UNROLLING),
VPU_CONFIG_KEY(FORCE_PURE_TENSOR_ITERATOR),
+ VPU_CONFIG_KEY(DISABLE_CONVERT_STAGES),
//
// Debug options
setOption(_compileConfig.enableReplaceWithReduceMean, switches, config, VPU_CONFIG_KEY(ENABLE_REPLACE_WITH_REDUCE_MEAN));
setOption(_compileConfig.enableTensorIteratorUnrolling, switches, config, VPU_CONFIG_KEY(ENABLE_TENSOR_ITERATOR_UNROLLING));
setOption(_compileConfig.forcePureTensorIterator, switches, config, VPU_CONFIG_KEY(FORCE_PURE_TENSOR_ITERATOR));
+ setOption(_compileConfig.disableConvertStages, switches, config, VPU_CONFIG_KEY(DISABLE_CONVERT_STAGES));
setOption(_compileConfig.irWithVpuScalesDir, config, VPU_CONFIG_KEY(IR_WITH_SCALES_DIRECTORY));
setOption(_compileConfig.noneLayers, config, VPU_CONFIG_KEY(NONE_LAYERS), parseStringSet);
if (const auto envVar = std::getenv("IE_VPU_DUMP_ALL_PASSES")) {
_compileConfig.dumpAllPasses = std::stoi(envVar) != 0;
}
+ if (const auto envVar = std::getenv("IE_VPU_NUMBER_OF_SHAVES_AND_CMX_SLICES")) {
+ _compileConfig.numSHAVEs = _compileConfig.numCMXSlices = std::stoi(envVar);
+ }
#endif
}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <vpu/utils/numeric.hpp>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace vpu {
+
+namespace {
+
+class BroadcastStage final : public StageNode {
+public:
+ using StageNode::StageNode;
+
+protected:
+ StagePtr cloneImpl() const override {
+ return std::make_shared<BroadcastStage>(*this);
+ }
+
+ void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+ const auto inputOrder = input(0)->desc().dimsOrder();
+ auto outputOrder = DimsOrder::fromNumDims(output(0)->desc().numDims());
+
+ if (inputOrder.numDims() >= 3 && inputOrder.dimInd(Dim::C) == 0) {
+ outputOrder.moveDim(Dim::C, 0);
+ }
+
+ orderInfo.setOutput(outputEdge(0), outputOrder);
+ }
+
+ void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+ stridesInfo.setInput(inputEdge(0), StridesRequirement().remove(0));
+ stridesInfo.setOutput(outputEdge(0), StridesRequirement().remove(0));
+ }
+
+ void finalizeDataLayoutImpl() override {
+ }
+
+ void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+ }
+
+ void initialCheckImpl() const override {
+ const auto mode = attrs().getOrDefault<BroadcastMode>("mode", BroadcastMode::NUMPY);
+ const auto& dataPrecision = input(0)->desc().type();
+
+ VPU_THROW_UNLESS(numOutputs() == 1,
+ "{} stage with name {} must have only 1 output, actually provided {} outputs",
+ type(), name(), numOutputs());
+ if (mode == BroadcastMode::NUMPY) {
+ VPU_THROW_UNLESS(numInputs() == 2,
+ "{} stage with name {} and numpy mode must have 2 inputs, actually "
+ "provided {} inputs", type(), name(), numInputs());
+ assertInputsOutputsTypes(this,
+ {{dataPrecision}, {DataType::S32}},
+ {{dataPrecision}});
+
+ } else {
+ VPU_THROW_UNLESS(numInputs() == 3,
+ "{} stage with name {} and explicit mode must have 3 inputs, actually "
+ "provided {} inputs", type(), name(), numInputs());
+ assertInputsOutputsTypes(this,
+ {{dataPrecision}, {DataType::S32}, {DataType::S32}},
+ {{dataPrecision}});
+ }
+ }
+
+ void serializeParamsImpl(BlobSerializer& serializer) const override {
+ const auto mode = attrs().getOrDefault<BroadcastMode>("mode", BroadcastMode::NUMPY);
+ serializer.append(static_cast<uint32_t>(mode == BroadcastMode::NUMPY ? 0 : 1));
+ }
+
+ void serializeDataImpl(BlobSerializer& serializer) const override {
+ const auto mode = attrs().getOrDefault<BroadcastMode>("mode", BroadcastMode::NUMPY);
+
+ input(0)->serializeBuffer(serializer);
+ input(1)->serializeBuffer(serializer);
+ if (mode == BroadcastMode::EXPLICIT) {
+ input(2)->serializeBuffer(serializer);
+ }
+ output(0)->serializeBuffer(serializer);
+ }
+};
+
+} // namespace
+
+void FrontEnd::parseBroadcast(
+ const Model& model,
+ const ie::CNNLayerPtr& layer,
+ const DataVector& inputs,
+ const DataVector& outputs) const {
+ VPU_THROW_UNLESS(layer != nullptr,
+ "parseBroadcast expects valid CNNLayerPtr, got nullptr");
+
+ VPU_THROW_UNLESS(outputs.size() == 1,
+ "{} layer with name {} must have only 1 output, actually provided {} outputs",
+ layer->type, layer->name, outputs.size());
+ const auto output = outputs[0];
+
+ const auto modeString = layer->GetParamAsString("mode", "numpy");
+ if (modeString == "numpy") {
+ VPU_THROW_UNLESS(inputs.size() == 2,
+ "{} layer with name {} and numpy mode must have 2 inputs, actually "
+ "provided {} inputs", layer->type, layer->name, inputs.size());
+ } else if (modeString == "explicit") {
+ VPU_THROW_UNLESS(inputs.size() == 3,
+ "{} layer with name {} and explicit mode must have 3 inputs, actually "
+ "provided {} inputs", layer->type, layer->name, inputs.size());
+ const auto axesMappingDesc = inputs[2]->desc();
+ const auto axesMappingPerm = axesMappingDesc.dimsOrder().toPermutation();
+ const auto axesMappingDim = axesMappingDesc.dim(axesMappingPerm.at(0));
+ VPU_THROW_UNLESS(axesMappingDesc.numDims() == 1,
+ "{} layer with name {} and explicit mode must have 1D axesMapping tensor, "
+ "actually provided {}D tensor",
+ layer->type, layer->name, axesMappingDesc.numDims());
+ VPU_THROW_UNLESS(axesMappingDim == output->desc().numDims(),
+ "{} layer with name {} and explicit mode must have axesMapping tensor with "
+ "size equals to number of output dims, expected [{}], provided [{}]",
+ layer->type, layer->name, output->desc().numDims(), axesMappingDim);
+
+ } else {
+ VPU_THROW_FORMAT("{} layer with name {}: Graph Transformer doesn't support {} mode",
+ layer->type, layer->name, modeString);
+ }
+
+ const auto shape = inputs[1];
+ const auto shapeDesc = inputs[1]->desc();
+ const auto shapeDim = shapeDesc.dim(shapeDesc.dimsOrder().toPermutation().at(0));
+ VPU_THROW_UNLESS(shapeDesc.numDims() == 1,
+ "{} layer with name {} and explicit mode must have 1D target shape tensor, "
+ "actually provided {}D tensor",
+ layer->type, layer->name, shapeDesc.numDims());
+ VPU_THROW_UNLESS(shapeDim == output->desc().numDims(),
+ "{} layer with name {} and explicit mode must have target shape tensor with "
+ "size equals to number of output dims, expected [{}], provided [{}]",
+ layer->type, layer->name, output->desc().numDims(), shapeDim);
+
+ const auto mode = modeString == "numpy" ? BroadcastMode::NUMPY : BroadcastMode::EXPLICIT;
+
+ auto stage = model->addNewStage<BroadcastStage>(
+ layer->name,
+ StageType::Broadcast,
+ layer,
+ inputs,
+ outputs);
+
+ stage->attrs().set("mode", mode);
+}
+
+} //namespace vpu
#include <vpu/frontend/frontend.hpp>
+#include <vpu/utils/numeric.hpp>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
#include <vector>
-#include <limits>
#include <string>
-#include <algorithm>
#include <memory>
-#include <set>
-#include <unordered_set>
#include <utility>
-#include <vpu/utils/numeric.hpp>
-
namespace vpu {
namespace {
-class ConcatStage final : public StageNode {
+using InputEdges = details::ContainerRange<StageInputVector, false>;
+
+DimsOrder getMostSuitableOrder(const InputEdges& inputEdges) {
+ DimsOrderMap<int> dimsOrderVotes;
+ for (const auto& inEdge : inputEdges) {
+ dimsOrderVotes[inEdge->input()->desc().dimsOrder()]++;
+ }
+
+ // Select DimsOrder with most votes.
+ // For equal votes : HCW > CHW > HWC.
+
+ DimsOrder finalOrder;
+ int curVotes = -1;
+ for (const auto& p : dimsOrderVotes) {
+ if (p.second > curVotes) {
+ finalOrder = p.first;
+ curVotes = p.second;
+ } else if (p.second == curVotes) {
+ if (p.first.numDims() >= 3) {
+ if (p.first.dimInd(Dim::C) == 2) {
+ finalOrder = p.first;
+ } else if (p.first.dimInd(Dim::C) == 3 &&
+ finalOrder.dimInd(Dim::C) != 2) {
+ finalOrder = p.first;
+ }
+ }
+ }
+ }
+
+ VPU_INTERNAL_CHECK(finalOrder.numDims() > 0,
+ "getMostSuitableOrder must find order with rank which is grater than 0, "
+ "actually rank is {}", finalOrder.numDims());
+ VPU_INTERNAL_CHECK(curVotes > 0,
+ "getMostSuitableOrder: final order must have at least 1 vote "
+ "actually votes number is {}", curVotes);
+
+ return finalOrder;
+}
+
+//
+// StubConcatStage will be replaced with Data <-> Data edges on special stage processor
+//
+
+class StubConcatStage final : public StageNode {
public:
using StageNode::StageNode;
protected:
StagePtr cloneImpl() const override {
- return std::make_shared<ConcatStage>(*this);
+ return std::make_shared<StubConcatStage>(*this);
}
void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
- auto output = outputEdge(0)->output();
-
- DimsOrderMap<int> dimsOrderVotes;
- for (const auto& inEdge : inputEdges()) {
- dimsOrderVotes[inEdge->input()->desc().dimsOrder()]++;
- }
-
- // Select DimsOrder with most votes.
- // For equal votes : HCW > CHW > HWC.
-
- DimsOrder finalOrder;
- int curVotes = -1;
- for (const auto& p : dimsOrderVotes) {
- if (p.second > curVotes) {
- finalOrder = p.first;
- curVotes = p.second;
- } else if (p.second == curVotes) {
- if (p.first.numDims() >= 3) {
- if (p.first.dimInd(Dim::C) == 2) {
- finalOrder = p.first;
- } else if (p.first.dimInd(Dim::C) == 3 &&
- finalOrder.dimInd(Dim::C) != 2) {
- finalOrder = p.first;
- }
- }
- }
- }
-
- IE_ASSERT(finalOrder.numDims() > 0);
- IE_ASSERT(curVotes > 0);
+ const auto finalOrder = getMostSuitableOrder(inputEdges());
for (const auto& inEdge : inputEdges()) {
orderInfo.setInput(inEdge, finalOrder);
}
}
- IE_ASSERT(minConcatDimInd < dimsOrder.numDims());
+ VPU_INTERNAL_CHECK(minConcatDimInd < dimsOrder.numDims(),
+ "{} stage with name {} must have minConcatDimInd no greater than number "
+ "of dimensions, actually index is {}, number of dimension is {}",
+ type(), name(), minConcatDimInd, dimsOrder.numDims());
//
// Initial StridesRequirement for inputs and output.
}
void initialCheckImpl() const override {
- IE_ASSERT(numInputs() > 0);
- IE_ASSERT(numOutputs() == 1);
+ VPU_INTERNAL_CHECK(numInputs() > 0,
+ "{} stage with name {} must have no less than 1 input, "
+ "actually provided {} inputs", type(), name(), numInputs());
+ VPU_INTERNAL_CHECK(numOutputs() == 1,
+ "{} stage with name {} must have only 1 output, "
+ "actually provided {} outputs", type(), name(), numOutputs());
const auto& firstInputPrecision = input(0)->desc().type();
assertAllInputsOutputsTypes(this, {firstInputPrecision}, {firstInputPrecision});
}
void serializeParamsImpl(BlobSerializer&) const override {
- VPU_THROW_EXCEPTION << "Must never be called";
+ VPU_THROW_FORMAT("{} stage with name {} must never call serializeParamsImpl",
+ type(), name());
}
void serializeDataImpl(BlobSerializer&) const override {
- VPU_THROW_EXCEPTION << "Must never be called";
+ VPU_THROW_FORMAT("{} stage with name {} must never call serializeDataImpl",
+ type(), name());
+ }
+};
+
+//
+// ConcatStage will be inferred on device side
+//
+
+class ConcatStage final : public StageNode {
+public:
+ using StageNode::StageNode;
+
+protected:
+ StagePtr cloneImpl() const override {
+ return std::make_shared<ConcatStage>(*this);
+ }
+
+ void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+ const auto finalOrder = getMostSuitableOrder(inputEdges());
+
+ for (const auto& inEdge : inputEdges()) {
+ orderInfo.setInput(inEdge, finalOrder);
+ }
+
+ orderInfo.setOutput(outputEdge(0), finalOrder);
+ }
+
+ void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+ for (const auto& inEdge : inputEdges()) {
+ stridesInfo.setInput(inEdge, StridesRequirement().remove(0));
+ }
+ stridesInfo.setOutput(outputEdge(0), StridesRequirement().remove(0));
+ }
+
+ void finalizeDataLayoutImpl() override {
+ }
+
+ void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+ }
+
+ void initialCheckImpl() const override {
+ VPU_INTERNAL_CHECK(numInputs() > 0,
+ "{} stage with name {} must have no less than 1 input, "
+ "actually provided {} inputs", type(), name(), numInputs());
+ VPU_INTERNAL_CHECK(numOutputs() == 1,
+ "{} stage with name {} must have only 1 output, "
+ "actually provided {} outputs", type(), name(), numOutputs());
+
+ const auto& firstInputPrecision = input(0)->desc().type();
+ assertAllInputsOutputsTypes(this, {firstInputPrecision}, {firstInputPrecision});
+ }
+
+ void serializeParamsImpl(BlobSerializer& serializer) const override {
+ const auto axis = attrs().get<Dim>("axis");
+ const auto axisInd = input(0)->desc().dimsOrder().dimInd(axis);
+
+ serializer.append(static_cast<uint32_t>(axisInd));
+ serializer.append(static_cast<uint32_t>(numInputs()));
+ }
+
+ void serializeDataImpl(BlobSerializer& serializer) const override {
+ for (const auto& input : inputs()) {
+ input->serializeBuffer(serializer);
+ }
+ output(0)->serializeBuffer(serializer);
}
};
} // namespace
-void FrontEnd::parseConcat(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const {
- IE_ASSERT(!inputs.empty());
- IE_ASSERT(outputs.size() == 1);
+void FrontEnd::parseConcat(
+ const Model& model,
+ const ie::CNNLayerPtr& layer,
+ const DataVector& inputs,
+ const DataVector& outputs) const {
+ VPU_THROW_UNLESS(!inputs.empty(),
+ "{} layer with name {} must have no less than 1 input, "
+ "actually provided 0 inputs", layer->type, layer->name);
+ VPU_THROW_UNLESS(outputs.size() == 1,
+ "{} layer with name {} must have only 1 output, actually provided {} outputs",
+ layer->type, layer->name, outputs.size());
auto output = outputs[0];
auto concat = std::dynamic_pointer_cast<ie::ConcatLayer>(layer);
- IE_ASSERT(layer != nullptr);
+ VPU_THROW_UNLESS(layer != nullptr,
+ "{} layer with name {} must be able to convert to ie::ConcatLayer",
+ layer->type, layer->name);
- IE_ASSERT(concat->_axis < output->desc().numDims());
+ VPU_THROW_UNLESS(concat->_axis < output->desc().numDims(),
+ "{} layer with name {} must have axis attribute no grater than number of "
+ "dimensions, actually provided axis = {}, numDims = {}",
+ layer->type, layer->name, concat->_axis, output->desc().numDims());
auto perm = DimsOrder::fromNumDims(output->desc().numDims()).toPermutation();
auto axis = perm[output->desc().numDims() - 1 - concat->_axis];
- _stageBuilder->addConcatStage(model, concat->name, concat, axis, inputs, output);
+ // If there is DSR as concat's output in the transformed graph, then we need to infer
+ // concat on the device side. In other cases StubConcat stage will be added and it will
+ // be replace with Data <-> Data edges.
+ auto inferRequirement = ConcatInferRequirement::CanBeReplaced;
+ if (auto concatOp = std::dynamic_pointer_cast<ngraph::opset3::Concat>(layer->getNode())) {
+ inferRequirement = concatOp->get_output_as_single_output_node(0)->get_type_info() ==
+ ngraph::vpu::op::DynamicShapeResolver::type_info
+ ? ConcatInferRequirement::NeedToInfer
+ : ConcatInferRequirement::CanBeReplaced;
+ }
+
+ _stageBuilder->addConcatStage(model, concat->name, concat, axis, inputs, output, inferRequirement);
}
Stage StageBuilder::addConcatStage(
const ie::CNNLayerPtr& layer,
Dim axis,
const DataVector& inputs,
- const Data& output) {
+ const Data& output,
+ ConcatInferRequirement inferRequirement) {
std::vector<DimValues> offsets;
offsets.reserve(inputs.size());
- DimValues curOffset({{axis, 0}});
- for (const auto& input : inputs) {
- offsets.emplace_back(curOffset);
- curOffset.set(axis, curOffset[axis] + input->desc().dim(axis));
- }
+ Stage stage;
+ if (inferRequirement == ConcatInferRequirement::NeedToInfer) {
+ stage = model->addNewStage<ConcatStage>(
+ layer->name,
+ StageType::Concat,
+ layer,
+ inputs,
+ {output});
+ } else {
+ DimValues curOffset({{axis, 0}});
+ for (const auto& input : inputs) {
+ offsets.emplace_back(curOffset);
+ curOffset.set(axis, curOffset[axis] + input->desc().dim(axis));
+ }
- auto stage = addConcatStage(model, name, layer, std::move(offsets), inputs, output);
+ stage = addConcatStage(model, name, layer, std::move(offsets), inputs, output);
+ }
stage->attrs().set("axis", axis);
std::vector<DimValues>&& offsets,
const DataVector& inputs,
const Data& output) {
- IE_ASSERT(offsets.size() == inputs.size());
+ VPU_INTERNAL_CHECK(offsets.size() == inputs.size(),
+ "offsets count (provided {}) must be equal to inputs count (provided {}) to "
+ "create Concat stage with name {}", offsets.size(), inputs.size(), name);
- auto stage = model->addNewStage<ConcatStage>(
+ auto stage = model->addNewStage<StubConcatStage>(
name,
- StageType::Concat,
+ StageType::StubConcat,
layer,
inputs,
{output});
tryHW = false;
}
- if (kernelSizeX > 15 || kernelSizeY > 15 || kernelStrideX > 8) {
+ // 1x16 convolution is split into two 1x8 convolutions in splitLargeKernelConv pass
+ const bool KernelSizeCantBeSplit = !(kernelSizeX == 16 && kernelSizeY == 1);
+ const bool KernelSizeTooLarge = (kernelSizeX > 15 || kernelSizeY > 15);
+ if (KernelSizeTooLarge && KernelSizeCantBeSplit) {
+ tryHW = false;
+ }
+
+ if (kernelStrideX > 8) {
tryHW = false;
}
int groupSize = convLayer->_group;
+ // kernelStrideY doesn't matter when kernelSizeY==InputSizeY, change it to try HW in 1D case
+ if (kernelSizeY == input->desc().dim(Dim::H) + padTop + padBottom)
+ kernelStrideY = kernelStrideX;
+
//
// Check if HW is applicable
//
#include <memory>
#include <string>
#include <map>
-#include <unordered_set>
#include <utility>
#include <algorithm>
#include <tuple>
namespace vpu {
-static void calcSizesFromParams(const DataDesc &desc, const SmallVector<std::string> &bufferSizeRules, SmallVector<int, 3> &sizes);
+static SmallVector<int> calcSizesFromParams(const DataDesc& desc, const SmallVector<std::string>& bufferSizeRules,
+ std::map<std::string, std::string> layerParams);
namespace {
}
void serializeParamsImpl(BlobSerializer& serializer) const override {
- const auto& customLayer = attrs().get<CustomLayer::Ptr>("customLayer");
- const auto& gws = attrs().get<SmallVector<int, 3>>("gws");
- const auto& lws = attrs().get<SmallVector<int, 3>>("lws");
+ const auto& kernel = attrs().get<CustomKernel>("customKernel");
+ const auto& gws = attrs().get<SmallVector<int>>("gws");
+ const auto& lws = attrs().get<SmallVector<int>>("lws");
const auto& ports = attrs().get<std::map<std::string, int>>("ports");
-
- //
- // GWG, LWG, Offs
- //
+ const auto& localDataSizes = attrs().get<std::map<std::string, int>>("localDataSizes");
for (int i = 0; i < gws.size(); ++i) {
- serializer.append(static_cast<uint32_t>(gws[i]/lws[i]));
+ serializer.append(static_cast<uint32_t>(gws[i] / lws[i]));
}
for (auto x : lws) {
serializer.append(static_cast<uint32_t>(0));
}
- serializer.append(static_cast<uint32_t>(customLayer->maxShaves()));
-
- //
- // Kernel Id
- //
-
- serializer.append(static_cast<uint32_t>(customLayer->kernelId()));
-
- //
- // Number of inputs
- //
-
- IE_ASSERT(customLayer->stageNumInputs() >= 0);
- serializer.append(static_cast<uint32_t>(customLayer->stageNumInputs()));
-
- //
- // Total number of blobs
- //
-
+ serializer.append(static_cast<uint32_t>(kernel.maxShaves()));
+ serializer.append(static_cast<uint32_t>(kernel.kernelId()));
+ serializer.append(static_cast<uint32_t>(kernel.inputDataCount()));
serializer.append(static_cast<int32_t>(numInputs() + numOutputs()));
+ serializer.append(static_cast<uint32_t>(kernel.parameters().size()));
- //
- // Number of kernel parameters
- //
-
- serializer.append(static_cast<uint32_t>(customLayer->parameters().size()));
-
- //
- // Parameters & relocation info
- //
-
- std::map<std::string, CustomLayer::KernelParam> b2b;
- for (const auto& kp : customLayer->bindings()) {
+ std::map<std::string, CustomKernel::KernelParam> b2b;
+ for (const auto& kp : kernel.bindings()) {
b2b[kp.argName] = kp;
}
IE_ASSERT(origLayer() != nullptr);
- for (const auto& kp : customLayer->parameters()) {
+ for (const auto& kp : kernel.parameters()) {
const auto& parameter = b2b[kp];
switch (parameter.type) {
- case CustomParamType::Input:
- case CustomParamType::Output:
- case CustomParamType::InputBuffer:
- case CustomParamType::OutputBuffer:
- case CustomParamType::Data:
- {
- if (ports.find(kp) == ports.end()) {
- VPU_THROW_EXCEPTION
- << "Unable to bind parameter " << parameter.argName << " for "
- << origLayer()->type <<" layer. Name is: " << origLayer()->name;
- }
- int id = ports.find(kp)->second;
- serializer.append(static_cast<uint32_t>(0));
- serializer.append(static_cast<uint32_t>(id));
+ case CustomParamType::Input:
+ case CustomParamType::Output:
+ case CustomParamType::InputBuffer:
+ case CustomParamType::OutputBuffer:
+ case CustomParamType::Data: {
+ VPU_THROW_UNLESS(ports.find(kp) != ports.end(),
+ "XML specification for %s layer has no definition for %s parameter. Layer name: %s",
+ origLayer()->type, kp, origLayer()->name);
+
+ int id = ports.find(kp)->second;
+ serializer.append(static_cast<uint32_t>(0));
+ serializer.append(static_cast<uint32_t>(id));
+ break;
+ }
+ case CustomParamType::Int:
+ case CustomParamType::Float: {
+ const auto cnnParam = origLayer()->params.find(parameter.irSource);
+ if (cnnParam != origLayer()->params.end()) {
+ const auto param = [&]() -> std::string {
+ if (parameter.portIndex < 0) {
+ return cnnParam->second;
+ }
- break;
- }
- case CustomParamType::Int:
- case CustomParamType::Float:
- {
- if (origLayer()->params.find(parameter.irSource) != origLayer()->params.end()) {
- std::stringstream parameterStream(origLayer()->params[parameter.irSource]);
- std::string param;
+ VPU_THROW_UNLESS(cnnParam->second.find(',') != std::string::npos,
+ "Error while parsing CNNetwork parameter '%s' for '%s' layer: port-index=%d is set, "
+ "but parameter is neither a tensor, nor an array type.",
+ cnnParam->first, origLayer()->type, parameter.portIndex);
+
+ std::string value;
+ std::stringstream parameterStream{cnnParam->second};
for (int i = 0; i <= parameter.portIndex; i++) {
- getline(parameterStream, param, ',');
+ getline(parameterStream, value, ',');
}
+ return value;
+ }();
- if (parameter.type == CustomParamType::Int) {
- serializer.append(static_cast<int32_t>(std::stoi(param)));
- serializer.append(static_cast<int32_t>(-1));
- } else {
- serializer.append(static_cast<float>(std::stof(param) ));
- serializer.append(static_cast<int32_t>(-2));
- }
- break;
+ if (parameter.type == CustomParamType::Int) {
+ serializer.append(static_cast<int32_t>(std::stoi(param)));
+ serializer.append(static_cast<int32_t>(-1));
} else {
- auto pos = parameter.irSource.find_first_of('.');
- if (pos != std::string::npos) {
- auto blob = parameter.irSource.substr(0, pos);
- auto dim = parameter.irSource.substr(pos + 1, std::string::npos);
-
- IE_ASSERT(dim.length() == 1)
- << "Unable to deduce parameter " << parameter.argName << " for "
- << origLayer()->type <<" layer. Name is: " << origLayer()->name;
- char dimLetter = dim[0];
-
- ie::DataPtr origData;
- if (blob == "I") {
- origData = origLayer()->insData[parameter.portIndex].lock();
- } else {
- origData = origLayer()->outData[parameter.portIndex];
- }
- IE_ASSERT(origData != nullptr);
+ serializer.append(static_cast<float>(std::stof(param)));
+ serializer.append(static_cast<int32_t>(-2));
+ }
+ break;
+ } else {
+ auto pos = parameter.irSource.find_first_of('.');
+ if (pos != std::string::npos) {
+ auto blob = parameter.irSource.substr(0, pos);
+ auto dim = parameter.irSource.substr(pos + 1, std::string::npos);
- auto dims = origData->getDims();
- int ndims = dims.size();
+ VPU_THROW_UNLESS(dim.length() == 1,
+ "Unable to deduce parameter '%s' for '%s' layer. Name is: '%s'",
+ parameter.argName, origLayer()->type, origLayer()->name);
- if (ndims > 4)
- VPU_THROW_EXCEPTION
- << "Unable to deduce parameter " << parameter.argName << " for "
- << origLayer()->type <<" layer. Name is: " << origLayer()->name;
+ char dimLetter = dim[0];
- const std::map<char, int> vars = {
- { 'b', 0 }, { 'B', 0 },
- { 'f', 1 }, { 'F', 1 },
- { 'y', 2 }, { 'Y', 2 },
- { 'x', 3 }, { 'X', 3 },
- };
+ ie::DataPtr origData;
+ if (blob == "I") {
+ origData = origLayer()->insData[parameter.portIndex].lock();
+ } else {
+ origData = origLayer()->outData[parameter.portIndex];
+ }
+ IE_ASSERT(origData != nullptr);
- auto var = vars.find(dimLetter);
- if (var != vars.end()) {
- auto res = dims.at(var->second-4+ndims);
+ auto dims = origData->getDims();
+ int ndims = dims.size();
+
+ if (ndims > 4) {
+ VPU_THROW_UNLESS(dim.length() == 1,
+ "Unable to deduce parameter '%s' for '%s' layer. Name is: '%s'",
+ parameter.argName, origLayer()->type, origLayer()->name);
+ }
+ const std::map<char, int> vars = {
+ {'b', 0}, {'B', 0},
+ {'f', 1}, {'F', 1},
+ {'y', 2}, {'Y', 2},
+ {'x', 3}, {'X', 3},
+ };
+
+ auto var = vars.find(dimLetter);
+ if (var != vars.end()) {
+ auto res = dims.at(var->second - 4 + ndims);
+
+ serializer.append(static_cast<uint32_t>(res));
+ serializer.append(static_cast<int32_t>(-1));
+ } else {
+ VPU_THROW_FORMAT("Unable to deduce parameter '%s' for '%s' layer. Name is: '%s'",
+ parameter.argName, origLayer()->type, origLayer()->name);
+ }
- serializer.append(static_cast<uint32_t>(res));
+ break;
+ } else {
+ VPU_THROW_UNLESS(parameter.portIndex < 0,
+ "Unable to deduce parameter '%s' for '%s' layer: port-index=%d is set, "
+ "but parameter is neither a tensor, nor an array type.",
+ parameter.argName, origLayer()->type, parameter.portIndex);
+ try {
+ if (parameter.type == CustomParamType::Int) {
+ serializer.append(static_cast<int32_t>(std::stoi(parameter.irSource)));
serializer.append(static_cast<int32_t>(-1));
} else {
- VPU_THROW_EXCEPTION
- << "Unable to deduce parameter " << parameter.argName << " for "
- << origLayer()->type <<" layer. Name is: " << origLayer()->name;
+ serializer.append(static_cast<float>(std::stof(parameter.irSource)));
+ serializer.append(static_cast<int32_t>(-2));
}
-
break;
- } else {
- try {
- if (parameter.type == CustomParamType::Int) {
- serializer.append(static_cast<int32_t>(std::stoi(parameter.irSource)));
- serializer.append(static_cast<int32_t>(-1));
- } else {
- serializer.append(static_cast<float>(std::stof(parameter.irSource) ));
- serializer.append(static_cast<int32_t>(-2));
- }
- break;
- }
- catch (const std::invalid_argument&) {
- VPU_THROW_EXCEPTION
- << "Unable to deduce parameter " << parameter.argName << " for "
- << origLayer()->type <<" layer. Name is: " << origLayer()->name
- <<", parameter is: " << parameter.irSource;
- }
+ } catch (const std::invalid_argument&) {
+ VPU_THROW_FORMAT("Unable to deduce parameter '%s' for '%s' layer. "
+ "Name is: '%s', parameter is: '%s'",
+ parameter.argName, origLayer()->type, origLayer()->name, parameter.irSource);
}
}
}
- case CustomParamType::LocalData:
- {
- ie::DataPtr origData;
- if (parameter.dimSource == CustomDimSource::Input) {
- origData = origLayer()->insData[parameter.dimIdx].lock();
- } else {
- origData = origLayer()->outData[parameter.dimIdx];
- }
- IE_ASSERT(origData != nullptr);
-
- SmallVector<int, 3> sizes;
- calcSizesFromParams(DataDesc(origData->getTensorDesc()), parameter.bufferSizeRules, sizes);
-
- serializer.append(static_cast<int32_t>(sizes[0] * sizes[1] * sizes[2]));
- serializer.append(static_cast<int32_t>(-3));
+ }
+ case CustomParamType::LocalData: {
+ const auto size = localDataSizes.at(parameter.argName);
+ serializer.append(static_cast<int32_t>(size));
+ serializer.append(static_cast<int32_t>(-3));
- break;
- }
- default:
- VPU_THROW_EXCEPTION
- << "Unable to deduce parameter " << parameter.argName << " for "
- << origLayer()->type <<" layer. Name is: " << origLayer()->name;
+ break;
+ }
+ default:
+ VPU_THROW_FORMAT("Unable to deduce parameter '%s' for '%s' layer. Name is: '%s'",
+ parameter.argName, origLayer()->type, origLayer()->name);
}
}
}
} // namespace
-static void calcSizesFromParams(const DataDesc &desc, const SmallVector<std::string> &bufferSizeRules, SmallVector<int, 3> &sizes) {
- // assume output tensor is dimension source by default
- auto batchDim = desc.dim(Dim::N, 1);
- auto featureDim = desc.dim(Dim::C, 1);
- auto yDim = desc.dim(Dim::H, 1);
- auto xDim = desc.dim(Dim::W, 1);
-
- const std::map<char, int> vars = {
- { 'b', batchDim }, { 'B', batchDim },
- { 'f', featureDim }, { 'F', featureDim },
- { 'y', yDim }, { 'Y', yDim },
- { 'x', xDim }, { 'X', xDim },
- };
+static SmallVector<int> calcSizesFromParams(const DataDesc& desc, const SmallVector<std::string>& bufferSizeRules,
+ std::map<std::string, std::string> layerParams) {
+ {
+ const auto B = std::to_string(desc.dim(Dim::N, 1));
+ const auto F = std::to_string(desc.dim(Dim::C, 1));
+ const auto Y = std::to_string(desc.dim(Dim::H, 1));
+ const auto X = std::to_string(desc.dim(Dim::W, 1));
+
+ auto sizes = std::vector<std::pair<std::string, std::string>> {
+ {"b", B}, {"B", B},
+ {"f", F}, {"F", F},
+ {"y", Y}, {"Y", Y},
+ {"x", X}, {"X", X},
+ };
- sizes.reserve(std::max<size_t>(bufferSizeRules.size(), 3));
- for (const auto& rule : bufferSizeRules) {
- SimpleMathExpression expr;
- expr.setVariables(vars);
- expr.parse(rule);
- sizes.emplace_back(expr.evaluate());
- }
- while (sizes.size() < 3) {
- sizes.emplace_back(1);
+ std::move(begin(sizes), end(sizes), inserter(layerParams, end(layerParams)));
}
-}
-static CustomLayer::Ptr chooseSuitable(const std::vector<CustomLayer::Ptr>& customLayers,
- const std::map<std::string, std::string>& layerParams) {
- ie::details::CaselessEq<std::string> cmp;
+ MathExpression expr;
+ expr.setVariables(layerParams);
+ const auto parseSizeRule = [&expr](const std::string& rule) {
+ expr.parse(rule);
+ return expr.evaluate();
+ };
- for (const auto& customLayer : customLayers) {
- bool suitable = true;
- for (const auto& whereParam : customLayer->whereParams()) {
- if (layerParams.find(whereParam.first) == layerParams.end() ||
- !cmp(layerParams.find(whereParam.first)->second, whereParam.second)) {
- suitable = false;
- }
- }
- if (suitable) {
- return customLayer;
- }
- }
+ auto sizes = SmallVector<int>{};
+ sizes.reserve(bufferSizeRules.size());
+ std::transform(begin(bufferSizeRules), end(bufferSizeRules), std::back_inserter(sizes), parseSizeRule);
- IE_ASSERT(false);
- return CustomLayer::Ptr(nullptr);
+ return sizes;
}
void FrontEnd::parseCustom(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) {
IE_ASSERT(layer != nullptr);
IE_ASSERT(outputs.size() == 1);
- std::vector<CustomLayer::Ptr> customLayersForType;
- if (_customLayers.count(layer->type) > 0) {
- customLayersForType.push_back(chooseSuitable(_customLayers.find(layer->type)->second, layer->params));
- } else if (_customLayers.count(layer->type + "@stage_0") > 0) {
- int stageNum = 0;
- while (_customLayers.count(layer->type + "@stage_" + std::to_string(stageNum)) > 0) {
- customLayersForType.push_back(chooseSuitable(_customLayers.find(layer->type + "@stage_" + std::to_string(stageNum))->second,
- layer->params));
- stageNum++;
- }
- } else {
- IE_ASSERT(false);
- }
+ const auto suitableLayer = [&] {
+ const auto customLayersForType = _customLayers.find(layer->type);
+ IE_ASSERT(customLayersForType != _customLayers.end());
+ return getSuitableCustomLayer(customLayersForType->second, layer);
+ }();
+ IE_ASSERT(suitableLayer);
+ const auto kernels = suitableLayer->kernels();
// Get all buffers, buffers must be unique associated by port index
std::map<int, Data> tempBuffsMap;
- for (size_t stageNum = 0; stageNum < customLayersForType.size(); stageNum++) {
- for (auto& param : customLayersForType[stageNum]->bindings()) {
+ for (const auto& kernel : kernels) {
+ for (const auto& param : kernel.bindings()) {
if (param.type == CustomParamType::InputBuffer || param.type == CustomParamType::OutputBuffer) {
- SmallVector<int, 3> sizes;
- auto desc = (param.dimSource == CustomDimSource::Input) ? inputs[param.dimIdx]->desc() : outputs[param.dimIdx]->desc();
- calcSizesFromParams(desc, param.bufferSizeRules, sizes);
- auto buf = model->addNewData("custom_" + layer->type + "_buf", DataDesc({sizes[0], sizes[1], sizes[2], 1}));
+ const auto desc = (param.dimSource == CustomDimSource::Input) ? inputs[param.dimIdx]->desc()
+ : outputs[param.dimIdx]->desc();
+ const auto sizes = calcSizesFromParams(desc, { param.bufferSizeRule }, layer->params);
+ const auto buf = model->addNewData("custom_" + layer->type + "_buf", DataDesc({sizes[0], 1, 1, 1}));
if (tempBuffsMap.find(param.portIndex) == tempBuffsMap.end()) {
tempBuffsMap[param.portIndex] = buf;
}
}
// Gather inputs and outputs for each stage for the layer
- for (int stage_num = 0; stage_num < customLayersForType.size(); stage_num++) {
- auto customLayer = customLayersForType[stage_num];
+ for (int stage_num = 0; stage_num < kernels.size(); stage_num++) {
+ const auto& kernel = kernels[stage_num];
std::map<std::string, int> ports;
std::vector<CustomDataFormat> formats;
// Gather inputs
DataVector stageInputs;
- for (auto& param : customLayer->bindings()) {
+ for (auto& param : kernel.bindings()) {
if (param.type == CustomParamType::Input) {
ports[param.argName] = stageInputs.size();
formats.emplace_back(param.format);
}
// Gather data blobs
- for (auto& param : customLayer->bindings()) {
+ for (auto& param : kernel.bindings()) {
if (param.type == CustomParamType::Data) {
auto blobIterator = layer->blobs.find(param.irSource);
if (blobIterator != layer->blobs.end()) {
}
}
- customLayer->setStageNumInputs(stageInputs.size());
formats.emplace_back(CustomDataFormat::Any);
// Get kernel binary
- auto kernelNode = _kernelNodes.find(customLayer->kernelBinary());
+ auto kernelNode = _kernelNodes.find(kernel.kernelBinary());
if (kernelNode != _kernelNodes.end()) {
stageInputs.emplace_back((kernelNode->second));
} else {
- auto kernelBinaryDesc = DataDesc({customLayer->kernelBinary().length()});
+ auto kernelBinaryDesc = DataDesc({kernel.kernelBinary().length()});
kernelBinaryDesc.setType(DataType::U8);
auto kernelBinary = model->addConstData(
layer->type + "@kernelBinary",
kernelBinaryDesc,
- std::make_shared<KernelBinaryContent>(customLayer->kernelBinary()));
+ std::make_shared<KernelBinaryContent>(kernel.kernelBinary()));
stageInputs.emplace_back((kernelBinary));
- _kernelNodes[customLayer->kernelBinary()] = kernelBinary;
+ _kernelNodes[kernel.kernelBinary()] = kernelBinary;
}
DataVector stageOutputs;
- for (auto& param : customLayer->bindings()) {
+ for (auto& param : kernel.bindings()) {
if (param.type == CustomParamType::Output) {
ports[param.argName] = stageInputs.size() + stageOutputs.size();
stageOutputs.emplace_back(outputs[param.portIndex]);
}
auto stage = model->addNewStage<CustomStage>(
- layer->name + ((customLayersForType.size() == 1) ? "" : "@stage_" + std::to_string(stage_num)),
+ layer->name + ((kernels.size() == 1) ? "" : "@stage_" + std::to_string(stage_num)),
StageType::Custom,
layer,
stageInputs,
stageOutputs);
- stage->attrs().set("customLayer", customLayer);
+ stage->attrs().set("customKernel", suitableLayer->kernels()[stage_num]);
stage->attrs().set("ports", ports);
stage->attrs().set("formats", formats);
- SmallVector<int, 3> gws;
- SmallVector<int, 3> lws;
- auto dimSource = (customLayer->dimSource() == CustomDimSource::Input) ? inputs : outputs;
- calcSizesFromParams(dimSource[customLayer->dimSourceIndex()]->desc(), customLayer->globalSizeRules(), gws);
- calcSizesFromParams(dimSource[customLayer->dimSourceIndex()]->desc(), customLayer->localSizeRules(), lws);
+ const auto& dimSource = (kernel.dimSource() == CustomDimSource::Input) ? inputs : outputs;
+ const auto& dataDesc = dimSource[kernel.dimSourceIndex()]->desc();
+
+ const auto gws = calcSizesFromParams(dataDesc, kernel.globalGridSizeRules(), layer->params);
+ const auto lws = calcSizesFromParams(dataDesc, kernel.localGridSizeRules(), layer->params);
stage->attrs().set("gws", gws);
stage->attrs().set("lws", lws);
+ const auto localDataSizes = [&] {
+ auto sizes = std::map<std::string, int>{};
+ for (const auto& bind : kernel.bindings()) {
+ if (bind.type == CustomParamType::LocalData) {
+ const auto& source = bind.dimSource == CustomDimSource::Input ? inputs : outputs;
+ const auto& desc = source[bind.dimIdx]->desc();
+ const auto size = calcSizesFromParams(desc, { bind.bufferSizeRule }, layer->params);
+ sizes.emplace(bind.argName, size[0]);
+ }
+ }
+ return sizes;
+ }();
+
+ stage->attrs().set("localDataSizes", localDataSizes);
+
std::map<int, DimsOrder> inputOrders;
std::map<int, DimsOrder> outputOrders;
- std::map<std::string, CustomLayer::KernelParam> b2b;
- for (const auto& kp : customLayer->bindings()) {
+ std::map<std::string, CustomKernel::KernelParam> b2b;
+ for (const auto& kp : kernel.bindings()) {
b2b[kp.argName] = kp;
}
{ CustomDataFormat::FYX, DimsOrder::CHW }
};
- for (const auto& kp : customLayer->parameters()) {
+ for (const auto& kp : kernel.parameters()) {
const auto& parameter = b2b[kp];
if (parameter.type == CustomParamType::Input) {
stage->attrs().set("inputOrders", std::move(inputOrders));
stage->attrs().set("outputOrders", std::move(outputOrders));
- int buffer_size = customLayer->kernelBinary().length() + 1024;
+ int buffer_size = kernel.kernelBinary().length() + 1024;
model->addTempBuffer(
stage,
DataDesc({buffer_size}));
namespace vpu {
void FrontEnd::parseDSR(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const {
- VPU_THROW_UNLESS(inputs.size() == 2, "Error while parsing {} with type {}, got {} inputs, while {} were expected",
+ VPU_THROW_UNLESS(inputs.size() == 2, "Error while parsing {} of type {}, got {} inputs, while {} were expected",
layer->name, layer->type, inputs.size(), 2);
-
- VPU_THROW_UNLESS(outputs.size() == 1, "Error while parsing {} with type {}, got {} outputs, while {} were expected",
- layer->name, layer->type, outputs.size(), 1);
-
const auto& data = inputs[0];
const auto& shape = inputs[1];
+ VPU_THROW_UNLESS(outputs.size() == 1, "Parsing layer {} of type {} failed: got {} outputs, while {} were expected",
+ layer->name, layer->type, outputs.size(), 1);
const auto& dataOutput = outputs[0];
+ const auto dataProducerEdge = data->producerEdge();
+ VPU_THROW_UNLESS(dataProducerEdge != nullptr, "Parsing layer {} of type {} failed: input with index {} (of name {}) must have a producer",
+ layer->name, layer->type, 0, data->name());
+
VPU_THROW_UNLESS(shape->desc().numDims() == 1,
- "Error while parsing {} with type {}, the number of dimensions for the second input {} should be equal to 1 "
- "but got {} instead",
- layer->name, layer->type, shape->name(), shape->desc().numDims());
+ "Parsing layer {} of type {} failed: input with index {} (of name {}) must have rank equal to {}, actual is {}",
+ layer->name, layer->type, 0, shape->name(), 1, shape->desc().numDims());
VPU_THROW_UNLESS(shape->desc().totalDimSize() == data->desc().numDims(),
- "Error while parsing {} with type {}, the total number of elements for the second input {} should be equal to "
- "the number of dimensions for the first input {}, but got {} and {} respectively",
- layer->name, layer->type, shape->name(), data->name(), shape->desc().totalDimSize(), data->desc().numDims());
+ "Parsing layer {} of type {} failed: input with index {} (of name {}) must have the same total elements number as "
+ "input with index {} (of name {}), actual {} and {} respectively",
+ layer->name, layer->type, 0, shape->name(), 1, data->name(), shape->desc().totalDimSize(), data->desc().numDims());
- // Dynamic input shape is unsupported
- VPU_THROW_UNLESS(data->producer() != nullptr,
- "Parsing layer {} with type {} failed: DSR stages must have a producer, but actually it doesn't",
- layer->name, layer->type);
+ const auto shapeProducerEdge = shape->producerEdge();
+ VPU_THROW_UNLESS(shapeProducerEdge != nullptr, "Parsing layer {} of type {} failed: input with index {} (of name {}) must have a producer",
+ layer->name, layer->type, 1, shape->name());
- const auto dataOutputEdge = data->producerEdge();
- const auto shapeOutputEdge = shape->producerEdge();
+ model->replaceStageOutput(dataProducerEdge, dataOutput);
+ if (const auto& dataToShapeEdge = data->parentDataToShapeEdge()) {
+ model->replaceDataToShapeChild(dataToShapeEdge, dataOutput);
+ }
+ model->removeUnusedData(data);
if (dataOutput->usage() == DataUsage::Output) {
// Create the second output with shape in case of dynamic output
const auto& shapeOutput = model->addOutputData(dataOutput->name() + "@shape", shape->desc());
- model->replaceStageOutput(shapeOutputEdge, shapeOutput);
+ model->replaceStageOutput(shapeProducerEdge, shapeOutput);
+ model->connectDataWithShape(shapeOutput, dataOutput);
+
+ for (const auto& dataToShapeEdge : shape->childDataToShapeEdges()) {
+ model->replaceDataToShapeParent(dataToShapeEdge, shapeOutput);
+ }
model->removeUnusedData(shape);
} else {
model->connectDataWithShape(shape, dataOutput);
}
-
- model->replaceStageOutput(dataOutputEdge, dataOutput);
- model->removeUnusedData(data);
}
} // namespace vpu
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
+#include <vector>
+#include <map>
+#include <unordered_set>
+#include <memory>
+#include <set>
+
+namespace vpu {
+
+namespace {
+
+class OutShapeOfReshapeStage final : public StageNode {
+private:
+ StagePtr cloneImpl() const override {
+ return std::make_shared<OutShapeOfReshapeStage>(*this);
+ }
+
+ void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+ }
+
+ void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+ }
+
+ void finalizeDataLayoutImpl() override {
+ }
+
+ void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+ }
+
+ void initialCheckImpl() const override {
+ assertInputsOutputsTypes(this,
+ {{DataType::S32}, {DataType::S32}},
+ {{DataType::S32}});
+ }
+
+ void serializeParamsImpl(BlobSerializer& serializer) const override {
+ auto specialZero = attrs().get<bool>("specialZero");
+
+ serializer.append(static_cast<int32_t>(specialZero));
+ }
+
+ void serializeDataImpl(BlobSerializer& serializer) const override {
+ input(0)->serializeBuffer(serializer);
+ input(1)->serializeBuffer(serializer);
+ output(0)->serializeBuffer(serializer);
+ }
+};
+
+} // namespace
+
+void FrontEnd::parseOutShapeOfReshape(
+ const Model& model,
+ const ie::CNNLayerPtr& layer,
+ const DataVector& inputs,
+ const DataVector& outputs) const {
+ VPU_THROW_UNLESS(inputs.size() == 2,
+ "OutShapeOfReshape stage with name %s must have only 2 inputs, "
+ "actually provided %d", layer->name, inputs.size());
+ VPU_THROW_UNLESS(outputs.size() == 1,
+ "OutShapeOfReshape stage with name %s must have only 1 output, "
+ "actually provided %d", layer->name, outputs.size());
+
+ auto inDataShape = inputs[0];
+ auto outShapeDescriptor = inputs[1];
+ auto outDataShape = outputs[0];
+
+ VPU_THROW_UNLESS(inDataShape->desc().numDims() == 1,
+ "OutShapeOfReshape stage with name %s must have 1D input data shape tensor, "
+ "actually provided %dD tensor", layer->name, inDataShape->desc().numDims());
+ VPU_THROW_UNLESS(outShapeDescriptor->desc().numDims() == 1,
+ "OutShapeOfReshape stage with name %s must have 1D output shape descriptor "
+ "tensor, actually provided %dD tensor",
+ layer->name, outShapeDescriptor->desc().numDims());
+ VPU_THROW_UNLESS(outDataShape->desc().numDims() == 1,
+ "OutShapeOfReshape stage with name %s must have 1D output data shape tensor, "
+ "actually provided %dD tensor", layer->name, outDataShape->desc().numDims());
+
+ VPU_THROW_UNLESS(outShapeDescriptor->desc().totalDimSize() == outDataShape->desc().totalDimSize(),
+ "OutShapeOfReshape stage with name %s must have output shape descriptor and "
+ "output data shape tensor with equal length, actually provided %d vs %d",
+ layer->name, outShapeDescriptor->desc().totalDimSize(),
+ outDataShape->desc().totalDimSize());
+
+
+ auto outShapeOfReshapeStage = model->addNewStage<OutShapeOfReshapeStage>(
+ layer->name,
+ StageType::OutShapeOfReshape,
+ layer,
+ inputs,
+ outputs);
+
+ auto specialZero = layer->GetParamAsInt("special_zero", 0);
+ outShapeOfReshapeStage->attrs().set<bool>("specialZero", specialZero);
+}
+
+} // namespace vpu
int outputWidth = output->desc().dim(Dim::W);
int outputHeight = output->desc().dim(Dim::H);
+ // kernelStrideY doesn't matter when kernelSizeY==InputSizeY, change it to try HW in 1D case
+ if (kernelSizeY == inputHeight + padTop + padBottom)
+ kernelStrideY = kernelStrideX;
+
bool tryHW = canTryHW(poolType,
inputWidth,
inputHeight,
//
#include <vpu/frontend/frontend.hpp>
+#include <vpu/model/data_desc.hpp>
#include <vpu/model/data_contents/ie_blob_content.hpp>
#include <algorithm>
#include <memory>
#include <set>
+#include <vector>
#include <string>
namespace vpu {
}
void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
- auto input0 = inputEdge(0)->input();
- auto input1 = inputEdge(1)->input();
- auto output = outputEdge(0)->output();
-
- auto in0Desc = input0->desc();
- auto in1Desc = input1->desc();
- auto outDesc = output->desc();
-
- auto in0Order = DimsOrder::fromNumDims(in0Desc.numDims());
- auto in1Order = DimsOrder::fromNumDims(in1Desc.numDims());
- auto outOrder = DimsOrder::fromNumDims(outDesc.numDims());
-
- orderInfo.setInput(inputEdge(0), in0Order);
- orderInfo.setInput(inputEdge(1), in1Order);
- orderInfo.setOutput(outputEdge(0), outOrder);
+ orderInfo.setInput(inputEdge(0), input(0)->desc().dimsOrder());
+ orderInfo.setInput(inputEdge(1), input(1)->desc().dimsOrder());
+ orderInfo.setOutput(outputEdge(0), output(0)->desc().dimsOrder());
}
void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
}
void finalizeDataLayoutImpl() override {
- auto input0 = inputEdge(0)->input();
- auto input1 = inputEdge(1)->input();
-
- auto in0Desc = input0->desc();
- auto in1Desc = input1->desc();
-
- IE_ASSERT(input1->usage() == DataUsage::Const);
+ auto reductionAxes = input(1);
+ auto in0Desc = input(0)->desc();
+ auto in1Desc = reductionAxes->desc();
+ VPU_THROW_UNLESS(reductionAxes->usage() == DataUsage::Const,
+ "Stage {} of type {} expects input with index {} ({}) to be {}, but it is {}",
+ name(), type(), 1, reductionAxes->name(), DataUsage::Const, reductionAxes->usage());
size_t ndims = in0Desc.numDims();
- IE_ASSERT(in1Desc.numDims() == 1);
+ VPU_THROW_UNLESS(in1Desc.numDims() == 1,
+ "Stage {} of type {} expects input with index {} ({}) to have dimensions number is {}, but it is {}",
+ name(), type(), 1, reductionAxes->name(), 1, in1Desc.numDims());
size_t indicesSize = in1Desc.totalDimSize();
- IE_ASSERT(indicesSize <= ndims);
+ VPU_THROW_UNLESS(indicesSize <= ndims,
+ "Stage {} of type {} expects input with index {} ({}) to have total size not greater than dimensions ",
+ "number of input with index {} ({}), but it is {} > {}",
+ name(), type(), 1, reductionAxes->name(), 0, input(0)->name(), indicesSize, ndims);
- const auto oldIndices = input1->content()->get<int32_t>();
+ const auto oldIndices = reductionAxes->content()->get<int32_t>();
auto newIndicesBlob = ie::make_shared_blob<int32_t>(InferenceEngine::TensorDesc(
ie::Precision::I32,
auto newIndices = newIndicesBlob->buffer().as<int32_t*>();
- const auto defDimsOrder = DimsOrder::fromNumDims(ndims);
- const auto defPerm = defDimsOrder.toPermutation();
+ const auto defPerm = DimsOrder::fromNumDims(ndims).toPermutation();
+ const auto dimsOrder = in0Desc.dimsOrder();
for (size_t i = 0; i < indicesSize; ++i) {
auto irIndex = oldIndices[i];
if (irIndex < 0) {
// handle negative indices
- irIndex = ndims - irIndex;
+ irIndex = ndims - std::abs(irIndex);
}
- IE_ASSERT(irIndex < ndims);
-
- const auto irRevIndex = ndims - 1 - irIndex;
+ VPU_THROW_UNLESS(irIndex < ndims,
+ "Stage {} of type {} expects input with index {} ({}) include values less than ",
+ "dimensions number of input with index {} ({}), but it is {} >= {}",
+ name(), type(), 1, reductionAxes->name(), 0, input(0)->name(), irIndex, ndims);
- const auto irDim = defPerm[irRevIndex];
-
- const auto vpuDimInd = in0Desc.dimsOrder().dimInd(irDim);
- newIndices[i] = vpuDimInd;
+ const auto reducedDim = defPerm[ndims - 1 - irIndex];
+ newIndices[i] = dimsOrder.dimInd(reducedDim);
}
-
std::sort(newIndices, newIndices + indicesSize);
auto newList = model()->duplicateData(
- input1,
+ reductionAxes,
"",
DataDesc(),
ieBlobContent(newIndicesBlob, DataType::S32));
}
void initialCheckImpl() const override {
- IE_ASSERT(input(0)->desc().type() == output(0)->desc().type());
+ VPU_THROW_UNLESS(input(0)->desc().type() == output(0)->desc().type(),
+ "Stage {} of type {} expects that data types of input with index {} ({}) ",
+ "and output with index {} ({}) are the same, but it is {} and {}",
+ name(), type(), 0, input(0)->name(), 0, output(0)->name(), input(0)->desc().type(), output(0)->desc().type());
assertInputsOutputsTypes(this,
{{DataType::FP16, DataType::S32}, {DataType::S32}},
{{DataType::FP16, DataType::S32}});
void FrontEnd::parseReduce(const Model& model, const ie::CNNLayerPtr& _layer, const DataVector& inputs, const DataVector& outputs) const {
auto layer = std::dynamic_pointer_cast<ie::ReduceLayer>(_layer);
- IE_ASSERT(layer != nullptr);
-
- IE_ASSERT(inputs.size() == 2);
- IE_ASSERT(outputs.size() == 1);
+ VPU_THROW_UNLESS(layer != nullptr,
+ "Layer {} of type {} is nullptr",
+ layer->name, layer->type);
+ VPU_THROW_UNLESS(inputs.size() == 2,
+ "Layer {} of type {} expects {} inputs, but provided {}",
+ layer->name, layer->type, 2, inputs.size());
+ VPU_THROW_UNLESS(outputs.size() == 1,
+ "Layer {} of type {} expects {} output, but provided {}",
+ layer->name, layer->type, 1, outputs.size());
auto stageType = StageType::None;
if (layer->type == "ReduceAnd") {
realOutputs = {outputValues};
}
- const bool isArgMaxPossible = outputsMode != TopKOutputs::All && mode == TopKMode::Max
- && ((sort == TopKSort::Value && outputsMode == TopKOutputs::ValueOnly)
- || (sort == TopKSort::Index && outputsMode == TopKOutputs::IndexOnly));
-
auto stage = model->addNewStage<TopKStage>(layer->name,
- isArgMaxPossible ? StageType::ArgMax : StageType::TopK,
+ StageType::TopK,
layer, inputs, realOutputs);
stage->attrs().set<Dim>("axis", axis);
#include <vpu/parsed_config.hpp>
#include <vpu/utils/profiling.hpp>
#include <vpu/utils/error.hpp>
+#include <transformations/common_optimizations/common_optimizations.hpp>
#include "vpu/ngraph/transformations/dynamic_to_static_shape.hpp"
#include "generic_ie.hpp"
using namespace vpu::MyriadPlugin;
ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(
- const ICore* /*core*/,
const ICNNNetwork& network,
const std::map<std::string, std::string>& config) {
VPU_PROFILE(LoadExeNetworkImpl);
auto clonedNetwork = cloneNetwork(network);
if (auto function = clonedNetwork->getFunction()) {
ngraph::op::GenericIE::DisableReshape noReshape(function);
- vpu::DynamicToStaticShape().transform(*function);
+ ngraph::pass::CommonOptimizations().run_on_function(function);
+ vpu::DynamicToStaticShape().transform(function);
}
return std::make_shared<ExecutableNetwork>(*clonedNetwork, _devicePool, parsedConfigCopy);
void SetConfig(const std::map<std::string, std::string>& config) override;
ie::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(
- const ie::ICore* core,
const ie::ICNNNetwork& network,
const std::map<std::string, std::string>& config) override;
EXCLUDED_SOURCE_DIRS
${CMAKE_CURRENT_SOURCE_DIR}/extension_lib
LINK_LIBRARIES
+ gmock
funcTestUtils
ngraphFunctions
inference_engine_transformations
ADD_CPPLINT
- LABELS
- INFERENCE_ENGINE
DEPENDENCIES
extension_tests
+ mock_engine
+ LABELS
+ IE
)
include(CMakeParseArguments)
set(content "#include <${header_file}>\n${content}")
endforeach()
set(source_file "${CMAKE_CURRENT_BINARY_DIR}/modern_flags_${IE_TEST_TEST_SUFFIX}.cpp")
+ file(REMOVE ${source_file})
file(GENERATE OUTPUT ${source_file} CONTENT ${content})
set(target_name ieFuncTestsCompilation${IE_TEST_TEST_SUFFIX})
using namespace std;
using namespace InferenceEngine::details;
-class CaselessTests : public ::testing::Test {
- protected:
- virtual void TearDown() {
- }
-
- virtual void SetUp() {
- }
-
- public:
-
-};
+using CaselessTests = ::testing::Test;
TEST_F(CaselessTests, emptyAreEqual) {
ASSERT_TRUE(InferenceEngine::details::equal("", ""));
}
TEST_F(CaselessTests, canFindCaslessInUnordered) {
-
caseless_unordered_map <string, int> storage = {
{"Abc", 1},
{"bC", 2},
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_core.hpp>
+#include <details/ie_exception.hpp>
+#include <ie_plugin_config.hpp>
+#include <ie_extension.h>
+
+#include <file_utils.h>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <functional_test_utils/test_model/test_model.hpp>
+#include <common_test_utils/file_utils.hpp>
+#include <common_test_utils/test_assertions.hpp>
+
+#include <gtest/gtest.h>
+#include <thread>
+#include <atomic>
+#include <mutex>
+#include <chrono>
+#include <fstream>
+
+class CoreThreadingTests : public ::testing::Test {
+public:
+ void SetUp() override {
+ }
+
+ void runParallel(std::function<void(void)> func,
+ const unsigned int iterations = 100,
+ const unsigned int threadsNum = 8) {
+ std::vector<std::thread> threads(threadsNum);
+
+ for (auto & thread : threads) {
+ thread = std::thread([&](){
+ for (unsigned int i = 0; i < iterations; ++i) {
+ func();
+ }
+ });
+ }
+
+ for (auto & thread : threads) {
+ if (thread.joinable())
+ thread.join();
+ }
+ }
+
+ void safeAddExtension(InferenceEngine::Core & ie) {
+ try {
+ auto extension = InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(
+ FileUtils::makeSharedLibraryName<char>({},
+ std::string("extension_tests") + IE_BUILD_POSTFIX));
+ ie.AddExtension(extension);
+ } catch (const InferenceEngine::details::InferenceEngineException & ex) {
+ ASSERT_STR_CONTAINS(ex.what(), "name: experimental. Opset");
+ }
+ }
+};
+
+// tested function: SetConfig
+TEST_F(CoreThreadingTests, SetConfigPluginDoesNotExist) {
+ InferenceEngine::Core ie;
+ std::map<std::string, std::string> localConfig = {
+ { CONFIG_KEY(PERF_COUNT), InferenceEngine::PluginConfigParams::YES }
+ };
+
+ runParallel([&] () {
+ ie.SetConfig(localConfig);
+ }, 10000);
+}
+
+// tested function: RegisterPlugin
+TEST_F(CoreThreadingTests, RegisterPlugin) {
+ InferenceEngine::Core ie;
+ std::atomic<int> index{0};
+ runParallel([&] () {
+ const std::string deviceName = std::to_string(index++);
+ ie.RegisterPlugin(std::string("mock_engine") + IE_BUILD_POSTFIX, deviceName);
+ ie.GetVersions(deviceName);
+ ie.UnregisterPlugin(deviceName);
+ }, 4000);
+}
+
+// tested function: RegisterPlugins
+TEST_F(CoreThreadingTests, RegisterPlugins) {
+ InferenceEngine::Core ie;
+ std::atomic<unsigned int> index{0};
+
+ auto getPluginXml = [&] () -> std::tuple<std::string, std::string> {
+ std::string indexStr = std::to_string(index++);
+ std::string pluginsXML = InferenceEngine::getIELibraryPath() +
+ FileUtils::FileSeparator +
+ "test_plugins" + indexStr + ".xml";
+ std::ofstream file(pluginsXML);
+
+ file << "<ie><plugins><plugin location=\"";
+ file << FileUtils::FileTraits<char>::SharedLibraryPrefix();
+ file << "mock_engine";
+ file << IE_BUILD_POSTFIX;
+ file << FileUtils::DotSymbol<char>::value;
+ file << FileUtils::FileTraits<char>::SharedLibraryExt();
+ file << "\" name=\"";
+ file << indexStr;
+ file << "\"></plugin></plugins></ie>";
+ file.flush();
+ file.close();
+
+ return std::tie(pluginsXML, indexStr);
+ };
+
+ runParallel([&] () {
+ std::string fileName, deviceName;
+ std:tie(fileName, deviceName) = getPluginXml();
+ ie.RegisterPlugins(fileName);
+ ie.GetVersions(deviceName);
+ ASSERT_EQ(0, std::remove(fileName.c_str()));
+ }, 1000);
+}
+
+// tested function: GetAvailableDevices, UnregisterPlugin
+// TODO: some plugins initialization (e.g. GNA) failed during such stress-test scenario
+TEST_F(CoreThreadingTests, DISABLED_GetAvailableDevices) {
+ InferenceEngine::Core ie;
+ runParallel([&] () {
+ std::vector<std::string> devices = ie.GetAvailableDevices();
+
+ // unregister all the devices
+ for (auto && deviceName : devices) {
+ try {
+ ie.UnregisterPlugin(deviceName);
+ } catch (const InferenceEngine::details::InferenceEngineException & ex) {
+ // if several threads unload plugin at once, the first thread does this
+ // while all others will throw an exception that plugin is not registered
+ ASSERT_STR_CONTAINS(ex.what(), "name is not registered in the");
+ }
+ }
+ }, 30);
+}
+
+// tested function: ReadNetwork, AddExtension
+TEST_F(CoreThreadingTests, ReadNetwork) {
+ InferenceEngine::Core ie;
+ auto model = FuncTestUtils::TestModel::convReluNormPoolFcModelFP32;
+ auto network = ie.ReadNetwork(model.model_xml_str, model.weights_blob);
+
+ runParallel([&] () {
+ safeAddExtension(ie);
+ (void)ie.ReadNetwork(model.model_xml_str, model.weights_blob);
+ }, 100, 12);
+}
#include "debug.h"
#include <string>
-class DebugTests : public ::testing::Test {
-protected:
- virtual void TearDown() {
- }
-
- virtual void SetUp() {
- }
-
-public:
-
-};
+using DebugTests = ::testing::Test;
TEST_F(DebugTests, tolowerWorksWithEmptyString) {
std::string str = "";
class ExtensionLibTests : public CommonTestUtils::TestsCommon {
public:
std::string getExtensionPath() {
-#ifdef _WIN32
-#ifdef __MINGW32__
- const char pre[] = "lib";
-#else
- const char pre[] = "";
-#endif
-#ifdef NDEBUG
- const char ext[] = ".dll";
-#else
- const char ext[] = "d.dll";
-#endif
- const char FileSeparator[] = "\\";
-
-#else
- const char FileSeparator[] = "/";
-#if defined __APPLE__
-#ifdef NDEBUG
- const char ext[] = ".dylib";
-#else
- const char ext[] = "d.dylib";
-#endif
- const char pre[] = "lib";
-#else
- const char pre[] = "lib";
- const char ext[] = ".so";
-#endif
-#endif
- return getIELibraryPath() + FileSeparator + pre + "extension_tests" + ext;
+ return FileUtils::makeSharedLibraryName<char>({},
+ std::string("extension_tests") + IE_BUILD_POSTFIX);
}
};
#include <string>
#include "ngraph_reader_tests.hpp"
+#include <ngraph/opsets/opset.hpp>
+#include <ngraph/ngraph.hpp>
+#include <ie_iextension.h>
+
+class FakeAbs : public ngraph::op::Op {
+public:
+ static constexpr ngraph::NodeTypeInfo type_info{"Abs", 100500};
+ const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
+
+ FakeAbs() = default;
+ FakeAbs(const ngraph::Output<ngraph::Node>& arg): ngraph::op::Op({arg}) {
+ constructor_validate_and_infer_types();
+ }
+ void validate_and_infer_types() override {
+ set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+ }
+ std::shared_ptr<ngraph::Node> copy_with_new_args(const ngraph::NodeVector& new_args) const override {
+ return std::make_shared<FakeAbs>(new_args.at(0));
+ }
+ bool visit_attributes(ngraph::AttributeVisitor& visitor) override {
+ return true;
+ }
+};
+constexpr ngraph::NodeTypeInfo FakeAbs::type_info;
+
+class AbsFakeExtension: public InferenceEngine::IExtension {
+public:
+ void GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept override {}
+ void Release() noexcept override { delete this; }
+ void Unload() noexcept override {}
+
+ std::map<std::string, ngraph::OpSet> getOpSets() override{
+ std::map<std::string, ngraph::OpSet> opsets;
+ ngraph::OpSet opset;
+ opset.insert<FakeAbs>();
+ opsets["experimental"] = opset;
+ return opsets;
+ }
+};
+
+TEST_F(NGraphReaderTests, ReadAbsFromCustomOpsetNetwork) {
+ std::string model = R"V0G0N(
+<net name="Network" version="10">
+ <layers>
+ <layer name="in1" type="Parameter" id="0" version="opset1">
+ <data element_type="f32" shape="1,3,22,22"/>
+ <output>
+ <port id="0" precision="FP32">
+ <dim>1</dim>
+ <dim>3</dim>
+ <dim>22</dim>
+ <dim>22</dim>
+ </port>
+ </output>
+ </layer>
+ <layer name="Abs" id="1" type="Abs" version="experimental">
+ <input>
+ <port id="1" precision="FP32">
+ <dim>1</dim>
+ <dim>3</dim>
+ <dim>22</dim>
+ <dim>22</dim>
+ </port>
+ </input>
+ <output>
+ <port id="2" precision="FP32">
+ <dim>1</dim>
+ <dim>3</dim>
+ <dim>22</dim>
+ <dim>22</dim>
+ </port>
+ </output>
+ </layer>
+ <layer name="output" type="Result" id="2" version="opset1">
+ <input>
+ <port id="0" precision="FP32">
+ <dim>1</dim>
+ <dim>3</dim>
+ <dim>22</dim>
+ <dim>22</dim>
+ </port>
+ </input>
+ </layer>
+ </layers>
+ <edges>
+ <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
+ <edge from-layer="1" from-port="2" to-layer="2" to-port="0"/>
+ </edges>
+</net>
+)V0G0N";
+
+ Core ie;
+ ie.AddExtension(std::make_shared<AbsFakeExtension>());
+ Blob::Ptr weights;
+
+ auto network = ie.ReadNetwork(model, weights);
+ auto nGraph = network.getFunction();
+ bool genericNodeExists = false;
+ const std::string type = "Abs";
+ for (auto op : nGraph->get_ops()) {
+ if (type == op->get_type_info().name && 100500 == op->get_type_info().version)
+ genericNodeExists = true;
+ }
+ ASSERT_TRUE(genericNodeExists);
+}
TEST_F(NGraphReaderTests, ReadAbsNetwork) {
std::string model = R"V0G0N(
</output>
</layer>
<layer id="3" name="add" precision="FP32" type="ReLU">
- <data originalLayersNames="relu" />
+ <data originalLayersNames="relu"/>
<input>
<port id="0">
<dim>1</dim>
allocator = details::make_pre_allocator(&*mybuf.begin(), mybuf.size());
}
std::shared_ptr<IAllocator> allocator;
-
};
TEST_F(PreallocatorTests, canAccessPreAllocatedMemory) {
- void * handle = allocator->alloc(3);
- float * ptr = (float *)allocator->lock(handle);
+ void * handle = allocator->alloc(3);
+ float * ptr = reinterpret_cast<float*>(allocator->lock(handle));
- mybuf = {1.1f,2.2f,3.3f};
+ mybuf = { 1.1f, 2.2f, 3.3f };
ASSERT_EQ(ptr, &*mybuf.begin());
ASSERT_EQ(ptr[0], 1.1f);
TEST_F(PreallocatorTests, canNotLockWrongHandle) {
void * handle = allocator->alloc(3);
- EXPECT_EQ(nullptr, allocator->lock(1 + (int*)handle));
+ EXPECT_EQ(nullptr, allocator->lock(1 + reinterpret_cast<int*>(handle)));
}
using namespace std;
-class PreProcessTests : public ::testing::Test {
-protected:
- virtual void TearDown() {
- }
-
- virtual void SetUp() {
- }
-
-public:
-
-};
+using PreProcessTests = ::testing::Test;
TEST_F(PreProcessTests, throwsOnSettingNullMeanImage) {
InferenceEngine::PreProcessInfo info;
using namespace std;
using namespace InferenceEngine;
-class ResponseBufferTests: public ::testing::Test {
-protected:
- virtual void TearDown() {
- }
-
- virtual void SetUp() {
- }
-
-public:
-
-};
-
+using ResponseBufferTests = ::testing::Test;
TEST_F(ResponseBufferTests, canCreateResponseMessage) {
ResponseDesc desc;
// SPDX-License-Identifier: Apache-2.0
//
+#include <gtest/gtest.h>
+
#include <ie_plugin_ptr.hpp>
-#include "tests_common.hpp"
+#include <file_utils.h>
#include "details/ie_so_loader.h"
-#include "inference_engine.hpp"
using namespace std;
using namespace InferenceEngine;
IE_SUPPRESS_DEPRECATED_START
-class SharedObjectLoaderTests: public TestsCommon {
+class SharedObjectLoaderTests: public ::testing::Test {
protected:
+ std::string get_mock_engine_name() {
+ return FileUtils::makeSharedLibraryName<char>(getIELibraryPath(),
+ std::string("mock_engine") + IE_BUILD_POSTFIX);
+ }
+
void loadDll(const string &libraryName) {
sharedObjectLoader.reset(new details::SharedObjectLoader(libraryName.c_str()));
}
unique_ptr<SharedObjectLoader> sharedObjectLoader;
-
+
template <class T>
std::function<T> make_std_function(const std::string& functionName) {
- std::function <T> ptr (reinterpret_cast<T*>(sharedObjectLoader->get_symbol(functionName.c_str())));
+ std::function<T> ptr(reinterpret_cast<T*>(sharedObjectLoader->get_symbol(functionName.c_str())));
return ptr;
}
};
TEST_F(SharedObjectLoaderTests, canFindExistedMethod) {
loadDll(get_mock_engine_name());
-
+
auto factory = make_std_function<StatusCode(IInferencePlugin*&, ResponseDesc*)>("CreatePluginEngine");
EXPECT_NE(nullptr, factory);
}
TEST_F(SharedObjectLoaderTests, throwIfMethodNofFoundInLibrary) {
loadDll(get_mock_engine_name());
-
+
EXPECT_THROW(make_std_function<IInferencePlugin*()>("wrong_function"), InferenceEngine::details::InferenceEngineException);
}
#include <gmock/gmock.h>
#include <gmock/gmock-spec-builders.h>
+#include <file_utils.h>
+#include <ie_plugin_ptr.hpp>
+
#include <memory>
-#include <tests_utils.hpp>
+#include <common_test_utils/test_assertions.hpp>
#include <details/ie_so_pointer.hpp>
#include <details/ie_irelease.hpp>
+using namespace InferenceEngine;
using namespace InferenceEngine::details;
using namespace ::testing;
using ::testing::InSequence;
}
};
-template<class T=PointedObjHelper, class L = SharedObjectLoaderHelper>
+template <class T = PointedObjHelper, class L = SharedObjectLoaderHelper>
class SoPointerHelper : public SOPointer<T, L> {
public:
SoPointerHelper(std::shared_ptr<L>&& loader, std::shared_ptr<T>&& object)
ASSERT_STR_DOES_NOT_CONTAIN(e.what(), "from CWD:");
}
}
+
+using SymbolLoaderTests = ::testing::Test;
+
+TEST_F(SymbolLoaderTests, throwCreateNullPtr) {
+ ASSERT_THROW(SymbolLoader<SharedObjectLoader>(nullptr), InferenceEngineException);
+}
+
+TEST_F(SymbolLoaderTests, instantiateSymbol) {
+ std::string name = FileUtils::makeSharedLibraryName<char>(getIELibraryPath(),
+ std::string("mock_engine") + IE_BUILD_POSTFIX);
+ std::shared_ptr<SharedObjectLoader> sharedLoader(new SharedObjectLoader(name.c_str()));
+ SymbolLoader<SharedObjectLoader> loader(sharedLoader);
+ IE_SUPPRESS_DEPRECATED_START
+ ASSERT_NE(nullptr, loader.instantiateSymbol<IInferencePlugin>(SOCreatorTrait<IInferencePlugin>::name));
+ IE_SUPPRESS_DEPRECATED_END
+}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_broadcast3.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+// Broadcast-3 is converted directly to Broadcast-1 for modes NUMPY, NONE and PDPD
+TEST(TransformationTests, ConvertBroadcast3WithNumpyModeToBroadcast1) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
+ auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+ auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(input1, target_shape, ngraph::op::BroadcastType::NUMPY);
+ broadcast->set_friendly_name("broadcast");
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertBroadcast3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
+ auto target_shape = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+ auto broadcast = std::make_shared<ngraph::opset1::Broadcast>(input1, target_shape, ngraph::op::AutoBroadcastType::NUMPY);
+ broadcast->set_friendly_name("broadcast");
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto broadcast_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ auto crop_node = broadcast_node->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(broadcast_node->get_friendly_name() == "broadcast") << "Transformation ConvertBroadcast3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertBroadcast3WithPDPDModeToBroadcast1) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
+ auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+ auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(input1, target_shape, ngraph::op::BroadcastType::PDPD);
+ broadcast->set_friendly_name("broadcast");
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertBroadcast3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
+ auto target_shape = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+ auto broadcast = std::make_shared<ngraph::opset1::Broadcast>(input1, target_shape, ngraph::op::AutoBroadcastType::PDPD);
+ broadcast->set_friendly_name("broadcast");
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto broadcast_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ auto crop_node = broadcast_node->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(broadcast_node->get_friendly_name() == "broadcast") << "Transformation ConvertBroadcast3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertBroadcast3WithExplicitModeToBroadcast1) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 5, 2});
+ auto brodcast_axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{0, 1, 2});
+ auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+ auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(input1, target_shape, brodcast_axis, ngraph::op::BroadcastType::EXPLICIT);
+ broadcast->set_friendly_name("broadcast");
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertBroadcast3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 5, 2});
+ auto brodcast_axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{0, 1, 2});
+ auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+ auto broadcast = std::make_shared<ngraph::opset1::Broadcast>(input1, target_shape, brodcast_axis, ngraph::op::AutoBroadcastType::EXPLICIT);
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto broadcast_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ auto crop_node = broadcast_node->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(broadcast_node->get_friendly_name() == "broadcast") << "Transformation ConvertBroadcast3 should keep output names.\n";
+}
+
+// Broadcast-3 with mode BIDIRECTIONAL is converted to Multiply with constant with 1s of the corresponding type
+TEST(TransformationTests, ConvertBroadcast3WithBidirectionalModeToBroadcast1) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 1, 2});
+ auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 1});
+ auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(input1, target_shape, ngraph::op::BroadcastType::BIDIRECTIONAL);
+ broadcast->set_friendly_name("broadcast");
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertBroadcast3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 1, 2});
+ auto target_shape = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 1});
+ auto constant_one = std::make_shared<ngraph::opset1::Constant>(input->get_output_element_type(0), ngraph::Shape({1}), std::vector<int>{1});
+ auto broadcast_ones = std::make_shared<ngraph::opset1::Broadcast>(constant_one, target_shape, ngraph::op::AutoBroadcastType::NUMPY);
+ auto multiply = std::make_shared<ngraph::opset1::Multiply>(input, broadcast_ones);
+ multiply->set_friendly_name("broadcast");
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{multiply}, ngraph::ParameterVector{input});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto result_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ auto crop_node = result_node->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(result_node->get_friendly_name() == "broadcast") << "Transformation ConvertBroadcast3 should keep output names.\n";
+}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_nms3.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, ConvertNMS3I32Output) {
+ std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+ {
+ auto boxes = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1000, 4});
+ auto scores = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1, 1000});
+ auto max_output_boxes_per_class = opset3::Constant::create(element::i64, Shape{}, {10});
+ auto iou_threshold = opset3::Constant::create(element::f32, Shape{}, {0.75});
+ auto score_threshold = opset3::Constant::create(element::f32, Shape{}, {0.7});
+ auto nms = std::make_shared<opset3::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+ iou_threshold, score_threshold, opset3::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i32);
+ nms->set_friendly_name("nms");
+
+ f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+ pass::InitNodeInfo().run_on_function(f);
+ pass::ConvertNMS3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto boxes = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1000, 4});
+ auto scores = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1, 1000});
+ auto max_output_boxes_per_class = opset3::Constant::create(element::i64, Shape{}, {10});
+ auto iou_threshold = opset3::Constant::create(element::f32, Shape{}, {0.75});
+ auto score_threshold = opset3::Constant::create(element::f32, Shape{}, {0.7});
+ auto nms = std::make_shared<opset2::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+ iou_threshold, score_threshold, opset1::NonMaxSuppression::BoxEncodingType::CORNER, true);
+ nms->set_friendly_name("nms");
+
+ f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto nms_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(nms_node->get_friendly_name() == "nms") << "Transformation ConvertTopK3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertNMS3I64Output) {
+ std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+ {
+ auto boxes = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1000, 4});
+ auto scores = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1, 1000});
+ auto max_output_boxes_per_class = opset3::Constant::create(element::i64, Shape{}, {10});
+ auto iou_threshold = opset3::Constant::create(element::f32, Shape{}, {0.75});
+ auto score_threshold = opset3::Constant::create(element::f32, Shape{}, {0.7});
+ auto nms = std::make_shared<opset3::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+ iou_threshold, score_threshold, opset3::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i64);
+ nms->set_friendly_name("nms");
+
+ f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+ pass::InitNodeInfo().run_on_function(f);
+ pass::ConvertNMS3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto boxes = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1000, 4});
+ auto scores = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1, 1000});
+ auto max_output_boxes_per_class = opset3::Constant::create(element::i64, Shape{}, {10});
+ auto iou_threshold = opset3::Constant::create(element::f32, Shape{}, {0.75});
+ auto score_threshold = opset3::Constant::create(element::f32, Shape{}, {0.7});
+ auto nms = std::make_shared<opset2::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+ iou_threshold, score_threshold, opset2::NonMaxSuppression::BoxEncodingType::CORNER, true);
+ auto convert = std::make_shared<ngraph::opset2::Convert>(nms, element::i64);
+ convert->set_friendly_name("nms");
+
+ f_ref = std::make_shared<Function>(NodeVector{convert}, ParameterVector{boxes, scores});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto nms_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(nms_node->get_friendly_name() == "nms") << "Transformation ConvertTopK3 should keep output names.\n";
+}
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <transformations/convert_scatter_elements_to_scatter.hpp>
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+std::shared_ptr<ngraph::Function> get_initial_function(const ngraph::PartialShape & data_shape,
+ const ngraph::PartialShape & indexes_shape,
+ const ngraph::PartialShape & updates_shape,
+ const ngraph::PartialShape & broadcast_shape,
+ const int64_t & axis) {
+ auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, data_shape);
+ auto indexes = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, indexes_shape);
+ auto updates = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, updates_shape);
+ auto axis_const = ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {axis});
+
+ uint64_t broadcast_len = broadcast_shape.rank().get_length();
+ auto broadcast_shape_param = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{broadcast_len});
+ auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(indexes, broadcast_shape_param);
+
+ auto scatter = std::make_shared<ngraph::opset3::ScatterElementsUpdate>(data, broadcast, updates, axis_const);
+
+ return std::make_shared<ngraph::Function>(ngraph::NodeVector{scatter}, ngraph::ParameterVector{data, indexes, updates, broadcast_shape_param});
+}
+
+std::shared_ptr<ngraph::Function> get_reference_function(const ngraph::PartialShape & data_shape,
+ const ngraph::PartialShape & indexes_shape,
+ const ngraph::PartialShape & updates_shape,
+ const int64_t & axis,
+ const ngraph::Shape & reshape_shape = {},
+ const std::vector<int64_t> & squeeze_indices = {}) {
+ auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, data_shape);
+ auto indexes = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, indexes_shape);
+ auto updates = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, updates_shape);
+ auto axis_const = ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {axis});
+
+ ngraph::Output<ngraph::Node> index_out = indexes->output(0);
+ if (!reshape_shape.empty()) {
+ index_out = std::make_shared<ngraph::opset3::Reshape>(indexes,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {reshape_shape.size()}, reshape_shape), false);
+ }
+
+ if (!squeeze_indices.empty()) {
+ index_out = std::make_shared<ngraph::opset3::Squeeze>(indexes,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {squeeze_indices.size()}, squeeze_indices));
+ }
+
+ auto scatter = std::make_shared<ngraph::opset3::ScatterUpdate>(data, index_out, updates, axis_const);
+
+ return std::make_shared<ngraph::Function>(ngraph::NodeVector{scatter}, ngraph::ParameterVector{data, indexes, updates});
+}
+
+void test(std::shared_ptr<ngraph::Function> f, std::shared_ptr<ngraph::Function> f_ref) {
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertScatterElementsToScatter().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ ngraph::pass::ConstantFolding().run_on_function(f);
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+void test(std::shared_ptr<ngraph::Function> f) {
+ test(f, f);
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis0) {
+ test(get_initial_function({1000, 256, 7, 7}, {1000, 1, 1, 1}, {1000, 256, 7, 7}, {1000, 256, 7, 7}, 0),
+ get_reference_function({1000, 256, 7, 7}, {1000, 1, 1, 1}, {1000, 256, 7, 7}, 0, {1000}));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis1) {
+ test(get_initial_function({1000, 256, 7, 7}, {256, 1, 1}, {1000, 256, 7, 7}, {1000, 256, 7, 7}, 1),
+ get_reference_function({1000, 256, 7, 7}, {256, 1, 1}, {1000, 256, 7, 7}, 1, {256}));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestNoReshape) {
+ test(get_initial_function({1000, 256, 7, 7}, {1}, {1000, 1, 7, 7}, {1000, 1, 7, 7}, 1),
+ get_reference_function({1000, 256, 7, 7}, {1}, {1000, 1, 7, 7}, 1));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestNoReshapeNegAxis) {
+ test(get_initial_function({1000, 256, 7, 7}, {1}, {1000, 1, 7, 7}, {1000, 1, 7, 7}, -3),
+ get_reference_function({1000, 256, 7, 7}, {1}, {1000, 1, 7, 7}, -3));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestNegative) {
+ test(get_initial_function({1000, 256, 7, 7}, {1000, 256, 1, 1}, {1000, 256, 7, 7}, {1000, 256, 7, 7}, 0));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis0Dyn) {
+ test(get_initial_function({DYN, 256, 7, 7}, {DYN, 1, 1, 1}, {DYN, 256, 7, 7}, {DYN, 256, 7, 7}, 0),
+ get_reference_function({DYN, 256, 7, 7}, {DYN, 1, 1, 1}, {DYN, 256, 7, 7}, 0, {}, {1, 2, 3}));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis1Dyn) {
+ test(get_initial_function({1000, DYN, 7, 7}, {DYN, 1, 1}, {1000, DYN, 7, 7}, {1000, DYN, 7, 7}, 1),
+ get_reference_function({1000, DYN, 7, 7}, {DYN, 1, 1}, {1000, DYN, 7, 7}, 1, {}, {1, 2}));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis1NoSqueezeDyn) {
+ test(get_initial_function({1000, DYN, 7, 7}, {DYN}, {1000, 256, 7, 7}, {1000, DYN, 7, 7}, 1),
+ get_reference_function({1000, DYN, 7, 7}, {DYN}, {1000, 256, 7, 7}, 1));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis0Neg1Dyn) {
+ test(get_initial_function({DYN, 256, 7, 7}, {DYN, DYN, 1, 1}, {DYN, 256, 7, 7}, {DYN, 256, 7, 7}, 0));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis0Neg2Dyn) {
+ test(get_initial_function({DYN, 256, 7, 7}, {DYN, 1, 2, 1}, {DYN, 256, 7, 7}, {DYN, 256, 7, 7}, 0));
+}
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_shapeof3.hpp>
+#include <transformations/init_node_info.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+TEST(TransformationTests, ConvertShapeOf3WithI64) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3});
+ auto shapeof = std::make_shared<ngraph::opset3::ShapeOf>(input, ngraph::element::i64);
+ shapeof->set_friendly_name("shapeof");
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{shapeof}, ngraph::ParameterVector{input});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertShapeOf3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3});
+ auto shapeof = std::make_shared<ngraph::opset1::ShapeOf>(input);
+ shapeof->set_friendly_name("shapeof");
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{shapeof}, ngraph::ParameterVector{input});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(output_node->get_friendly_name() == "shapeof") << "Transformation ConvertShapeOf3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertShapeOf3WithI32) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3});
+ auto shapeof = std::make_shared<ngraph::opset3::ShapeOf>(input, ngraph::element::i32);
+ shapeof->set_friendly_name("shapeof");
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{shapeof}, ngraph::ParameterVector{input});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertShapeOf3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3});
+ auto shapeof = std::make_shared<ngraph::opset1::ShapeOf>(input);
+ auto convert = std::make_shared<ngraph::opset1::Convert>(shapeof, ngraph::element::i32);
+ convert->set_friendly_name("shapeof");
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{convert}, ngraph::ParameterVector{input});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(output_node->get_friendly_name() == "shapeof") << "Transformation ConvertShapeOf3 should keep output names.\n";
+}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_topk3.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+// check that the first output from the TopK-3 with I32 output indices is equal to the TopK-1 first output
+TEST(TransformationTests, ConvertTopK3I32Output0) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+ auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+ auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+ topk->set_friendly_name("topk");
+
+ // due to the 'compare_functions' limitation we will check only one output
+ f = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(0)}, ngraph::ParameterVector{input});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertTopK3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+ auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+ auto topk = std::make_shared<ngraph::opset2::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+ topk->set_friendly_name("topk");
+
+ // due to the 'compare_functions' limitation we will check only one output
+ f_ref = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(0)}, ngraph::ParameterVector{input});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto topk_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(topk_node->get_friendly_name() == "topk") << "Transformation ConvertTopK3 should keep output names.\n";
+}
+
+// check that the second output from the TopK-3 with I32 output indices is equal to the TopK-1 second output
+TEST(TransformationTests, ConvertTopK3I32Output1) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+ auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+ auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+ topk->set_friendly_name("topk");
+
+ // due to the 'compare_functions' limitation we will check only one output
+ f = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(1)}, ngraph::ParameterVector{input});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertTopK3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+ auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+ auto topk = std::make_shared<ngraph::opset2::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+ topk->set_friendly_name("topk");
+
+ // due to the 'compare_functions' limitation we will check only one output
+ f_ref = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(1)}, ngraph::ParameterVector{input});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto topk_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(topk_node->get_friendly_name() == "topk") << "Transformation ConvertTopK3 should keep output names.\n";
+}
+
+// check that the first output from the TopK-3 with I64 output indices is equal to the TopK-1 first output
+TEST(TransformationTests, ConvertTopK3I64Output0) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+ auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+ auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 1, "min", "value", ngraph::element::i64);
+ topk->set_friendly_name("topk");
+
+ // due to the 'compare_functions' limitation we will check only one output
+ f = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(0)}, ngraph::ParameterVector{input});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertTopK3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+ auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+ auto topk = std::make_shared<ngraph::opset2::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+ topk->set_friendly_name("topk");
+
+ // due to the 'compare_functions' limitation we will check only one output
+ f_ref = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(0)}, ngraph::ParameterVector{input});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto topk_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+ ASSERT_TRUE(topk_node->get_friendly_name() == "topk") << "Transformation ConvertTopK3 should keep output names.\n";
+}
+
+// check that the second output from the TopK-3 with I64 output indices is equal to the TopK-1 second output converted to I64
+TEST(TransformationTests, ConvertTopK3I64Output1) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+ auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+ auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 1, "min", "value", ngraph::element::i64);
+ topk->set_friendly_name("topk");
+
+ // due to the 'compare_functions' limitation we will check only one output
+ f = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(1)}, ngraph::ParameterVector{input});
+
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::ConvertTopK3().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ }
+
+ {
+ auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+ auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+ auto topk = std::make_shared<ngraph::opset2::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+ auto convert = std::make_shared<ngraph::opset2::Convert>(topk->output(1), topk->get_index_element_type());
+ topk->set_friendly_name("topk");
+
+ // due to the 'compare_functions' limitation we will check only one output
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{convert}, ngraph::ParameterVector{input});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+
+ auto result_node_of_converted_f = f->get_output_op(0);
+ auto topk_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+}
}
for (int i = 0; i < node1->inputs().size(); ++i) {
- if (node1->input(i).get_shape() != node2->input(i).get_shape()) {
+ if (!node1->input(i).get_partial_shape().compatible(node2->input(i).get_partial_shape())) {
std::ostringstream out("Different shape detected");
- out << node1->input(i).get_shape() << " and " << node2->input(i).get_shape();
+ out << node1->input(i).get_partial_shape() << " and " << node2->input(i).get_partial_shape();
return {false, out.str()};
}
#include <memory>
#include <ngraph/function.hpp>
+#include <ngraph/dimension.hpp>
#include "common_test_utils/test_common.hpp"
+#define DYN ngraph::Dimension::dynamic()
+
using TransformationTests = CommonTestUtils::TestsCommon;
std::pair<bool, std::string> compare_functions(const std::shared_ptr<ngraph::Function> & f1, const std::shared_ptr<ngraph::Function> & f2);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <ngraph_ops/fully_connected.hpp>
+#include <transformations/convert_opset1_to_legacy/fc_bias_fusion.hpp>
+#include <transformations/optimize_strided_slice.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+TEST(TransformationTests, OptimizeSS_UselessDeletion_Negative1) {
+ std::shared_ptr<ngraph::Function> f(nullptr);
+ {
+ auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+ auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+ std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask = {1, 1, 1, 1}; // ignoring end -- slicing to the end
+
+ auto ss = std::make_shared<ngraph::opset1::StridedSlice>(data, begin, end, stride, begin_mask, end_mask);
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+ ngraph::pass::StridedSliceOptimization().run_on_function(f);
+ ngraph::pass::ConstantFolding().run_on_function(f);
+ }
+
+ auto res = compare_functions(f, f);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_UselessDeletion_Negative2) {
+ std::shared_ptr<ngraph::Function> f(nullptr);
+ {
+ auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic(4));
+ auto relu = std::make_shared<ngraph::opset1::Relu>(data);
+ auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+ std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask = {1, 1, 1, 1}; // ignoring end -- slicing to the end
+
+ auto ss = std::make_shared<ngraph::opset1::StridedSlice>(relu, begin, end, stride, begin_mask, end_mask);
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+ ngraph::pass::StridedSliceOptimization().run_on_function(f);
+ ngraph::pass::ConstantFolding().run_on_function(f);
+ }
+
+ auto res = compare_functions(f, f);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_UselessDeletion) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+ auto relu = std::make_shared<ngraph::opset1::Relu>(data);
+ auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+ std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask = {1, 1, 1, 1}; // ignoring end -- slicing to the end
+
+ auto ss = std::make_shared<ngraph::opset1::StridedSlice>(relu, begin, end, stride, begin_mask, end_mask);
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+ ngraph::pass::StridedSliceOptimization().run_on_function(f);
+ ngraph::pass::ConstantFolding().run_on_function(f);
+ }
+ {
+ auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+ auto relu = std::make_shared<ngraph::opset1::Relu>(data);
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{relu}, ngraph::ParameterVector{data});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_Usefull_Test) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+ auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+ std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask = {0, 0, 0, 0};
+
+ auto ss = std::make_shared<ngraph::opset1::StridedSlice>(data, begin, end, stride, begin_mask, end_mask);
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+ ngraph::pass::StridedSliceOptimization().run_on_function(f);
+ ngraph::pass::ConstantFolding().run_on_function(f);
+ }
+ {
+ auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+ auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+ std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask = {0, 0, 0, 0};
+
+ auto ss = std::make_shared<ngraph::opset1::StridedSlice>(data, begin, end, stride, begin_mask, end_mask);
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_Shared_Test) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+
+ auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+ std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+ auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin1, end1, stride1, begin_mask1, end_mask1);
+
+ auto begin2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+ std::vector<int64_t> begin_mask2 = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask2 = {0, 0, 0, 0};
+ auto ss2 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin2, end2, stride2, begin_mask2, end_mask2);
+
+ auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss2}, 0);
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+ ngraph::pass::StridedSliceOptimization().run_on_function(f);
+ ngraph::pass::ConstantFolding().run_on_function(f);
+ }
+ {
+ auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+
+ auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+ std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+ auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin1, end1, stride1, begin_mask1, end_mask1);
+
+ auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss1}, 0);
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+ }
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_NotShared_Test) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 6, 5, 5});
+
+ auto axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+ auto split = std::make_shared<ngraph::opset1::Split>(source, axis, 2);
+
+ auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+ std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+ auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(split->output(0), begin1, end1, stride1, begin_mask1, end_mask1);
+
+ auto begin2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+ std::vector<int64_t> begin_mask2 = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask2 = {0, 0, 0, 0};
+ auto ss2 = std::make_shared<ngraph::opset1::StridedSlice>(split->output(1), begin2, end2, stride2, begin_mask2, end_mask2);
+
+ auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss2}, 0);
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+ ngraph::pass::StridedSliceOptimization().run_on_function(f);
+ ngraph::pass::ConstantFolding().run_on_function(f);
+ }
+ {
+ auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 6, 5, 5});
+
+ auto axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+ auto split = std::make_shared<ngraph::opset1::Split>(source, axis, 2);
+
+ auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+ std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+ auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(split->output(0), begin1, end1, stride1, begin_mask1, end_mask1);
+
+ auto begin2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+ auto stride2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+ std::vector<int64_t> begin_mask2 = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask2 = {0, 0, 0, 0};
+ auto ss2 = std::make_shared<ngraph::opset1::StridedSlice>(split->output(1), begin2, end2, stride2, begin_mask2, end_mask2);
+
+ auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss2}, 0);
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_Groupped_Test) {
+ std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+ {
+ auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+
+ auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+ auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {5, 3, 5, 5});
+ auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+ std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+ auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin1, end1, stride1, begin_mask1, end_mask1);
+
+ auto begin2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 0, 0});
+ auto end2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {5, 5, 5, 5});
+ auto stride2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+ std::vector<int64_t> begin_mask2 = {0, 0, 0, 0};
+ std::vector<int64_t> end_mask2 = {0, 0, 0, 0};
+ auto ss2 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin2, end2, stride2, begin_mask2, end_mask2);
+
+ auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss2}, 1);
+
+ f = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+ ngraph::pass::StridedSliceOptimization().run_on_function(f);
+ ngraph::pass::ConstantFolding().run_on_function(f);
+ }
+ {
+ auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+
+ auto axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+ auto split_sizes = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {3, 2});
+ auto variadic_split = std::make_shared<ngraph::opset1::VariadicSplit>(source, axis, split_sizes);
+
+ auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{variadic_split->output(0), variadic_split->output(1)}, 1);
+
+ f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+ }
+
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <ngraph/pass/algebraic_simplification.hpp>
+#include <ngraph/pass/visualize_tree.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+using InputShape = ngraph::PartialShape;
+using TransposeOrder = std::vector<int64_t>;
+
+struct ReferenceParams {
+ bool no_changes = false;
+ bool is_empty = false;
+ std::vector<int64_t> reshape_value;
+
+ ReferenceParams() = default;
+
+ explicit ReferenceParams(bool no_changes, bool is_empty) : no_changes(no_changes), is_empty(is_empty) {}
+
+ explicit ReferenceParams(const std::vector<int64_t> & reshape_value): reshape_value(reshape_value) {}
+};
+
+class TransposeToReshapeTests: public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<InputShape, TransposeOrder, ReferenceParams> > {
+public:
+ std::shared_ptr<ngraph::Function> f, f_ref;
+
+ void SetUp() override {
+ const auto& input_shape = std::get<0>(GetParam());
+ const auto& transpose_order = std::get<1>(GetParam());
+ const auto& reference_params = std::get<2>(GetParam());
+
+ f = get_initial_function(input_shape, transpose_order);
+ f_ref = get_reference_function(input_shape, transpose_order, reference_params);
+ }
+
+private:
+ std::shared_ptr<ngraph::Function> get_initial_function(const ngraph::PartialShape & input_shape,
+ const std::vector<int64_t> & transpose_order) {
+ auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, input_shape);
+ auto order_const = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{transpose_order.size()}, transpose_order);
+ auto transpose = std::make_shared<ngraph::opset3::Transpose>(data, order_const);
+
+ // WA to test cases with transpose elimination
+ auto relu = std::make_shared<ngraph::opset3::Relu>(transpose);
+
+ return std::make_shared<ngraph::Function>(ngraph::NodeVector{relu}, ngraph::ParameterVector{data});
+ }
+
+ std::shared_ptr<ngraph::Function> get_reference_function(const ngraph::PartialShape & input_shape,
+ const std::vector<int64_t> & transpose_order,
+ const ReferenceParams & params) {
+ if (params.no_changes) {
+ return get_initial_function(input_shape, transpose_order);
+ }
+
+ auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, input_shape);
+
+ ngraph::Output<ngraph::Node> reshape_dims, last(data);
+ if (!params.reshape_value.empty()) {
+ reshape_dims = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{params.reshape_value.size()}, params.reshape_value);
+ } else {
+ auto shape_of = std::make_shared<ngraph::opset3::ShapeOf>(data);
+ reshape_dims = std::make_shared<ngraph::opset3::Gather>(shape_of,
+ ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{transpose_order.size()}, transpose_order),
+ ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0}));
+ }
+
+ if (!params.is_empty) {
+ last = std::make_shared<ngraph::opset3::Reshape>(last, reshape_dims, true);
+ }
+
+ last = std::make_shared<ngraph::opset3::Relu>(last);
+
+ return std::make_shared<ngraph::Function>(ngraph::NodeVector{last.get_node_shared_ptr()}, ngraph::ParameterVector{data});
+ }
+};
+
+TEST_P(TransposeToReshapeTests, CompareFunctions) {
+ ngraph::pass::InitNodeInfo().run_on_function(f);
+ ngraph::pass::AlgebraicSimplification().run_on_function(f);
+ ASSERT_NO_THROW(check_rt_info(f));
+ auto res = compare_functions(f, f_ref);
+ ASSERT_TRUE(res.first) << res.second;
+}
+
+#define SAME_FUNCTION ReferenceParams(true, false)
+#define EMPTY_FUNCTION ReferenceParams(false, true)
+#define SHAPE_OF_GATHER ReferenceParams()
+
+INSTANTIATE_TEST_CASE_P(KeepTranspose, TransposeToReshapeTests,
+ testing::Values(std::make_tuple(InputShape{1, 3, 64, 64}, TransposeOrder{0, 1, 3, 2}, SAME_FUNCTION),
+ std::make_tuple(InputShape{1, 3, 1, 64}, TransposeOrder{2, 0, 3, 1}, SAME_FUNCTION),
+ std::make_tuple(InputShape{1, 3, 1, 3}, TransposeOrder{3, 0, 2, 1}, SAME_FUNCTION),
+ std::make_tuple(InputShape{DYN, 2, 64, 1}, TransposeOrder{1, 0, 3, 2}, SAME_FUNCTION),
+ std::make_tuple(InputShape{DYN, 3}, TransposeOrder{1, 0}, SAME_FUNCTION),
+ std::make_tuple(InputShape{DYN, DYN, 1}, TransposeOrder{2, 1, 0}, SAME_FUNCTION),
+ std::make_tuple(InputShape{DYN, DYN}, TransposeOrder{1, 0}, SAME_FUNCTION)));
+
+INSTANTIATE_TEST_CASE_P(EliminateTranspose, TransposeToReshapeTests,
+ testing::Values(std::make_tuple(InputShape{1, 3, 64, 64}, TransposeOrder{0, 1, 2, 3}, EMPTY_FUNCTION),
+ std::make_tuple(InputShape{1, 1, 1}, TransposeOrder{2, 0, 1}, EMPTY_FUNCTION),
+ std::make_tuple(InputShape{DYN, DYN}, TransposeOrder{0, 1}, EMPTY_FUNCTION)));
+
+INSTANTIATE_TEST_CASE_P(ReshapeWithConstant, TransposeToReshapeTests,
+ testing::Values(std::make_tuple(InputShape{1, 3, 64, 1}, TransposeOrder{0, 1, 3, 2}, ReferenceParams({1, 3, 1, 64})),
+ std::make_tuple(InputShape{1, 3, 1, 64}, TransposeOrder{1, 0, 3, 2}, ReferenceParams({3, 1, 64, 1})),
+ std::make_tuple(InputShape{DYN, DYN, 1}, TransposeOrder{0, 2, 1}, ReferenceParams({0, 1, -1})),
+ std::make_tuple(InputShape{1, 1, DYN}, TransposeOrder{2, 1, 0}, ReferenceParams({-1, 1, 1})),
+ std::make_tuple(InputShape{DYN, 1, 64, 1}, TransposeOrder{1, 0, 3, 2}, ReferenceParams({1, -1, 1, 64}))));
+
+INSTANTIATE_TEST_CASE_P(ReshapeWithGather, TransposeToReshapeTests,
+ testing::Values(std::make_tuple(InputShape{DYN, 1, DYN, 1}, TransposeOrder{1, 0, 3, 2}, SHAPE_OF_GATHER),
+ std::make_tuple(InputShape{1, DYN, DYN, DYN}, TransposeOrder{1, 2, 3, 0}, SHAPE_OF_GATHER)));
+
+#undef SAME_FUNCTION
+#undef EMPTY_FUNCTION
+#undef SHAPE_OF_GATHER
class BF16NetworkRestore1 : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// + Power1(FP32)
// |
// + AvgPooling1(FP32)
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode2}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
*
* class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
* protected:
- * void SetUp()override {
+ * void SetUp() override {
* fnPtr = std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
// STAGE1:
// BFloat16Helpers::getMaxAbsValue(lm1.as<const float *>(), mout1->size()) << std::endl;
// std::cout << "Max in fp32 network by output " << outputNameFP32 << ": " <<
// BFloat16Helpers::getMaxAbsValue(lm2.as<const float *>(), mout2->size()) << std::endl;
-
FuncTestUtils::compareRawBuffers(lm1.as<const float *>(),
lm2.as<const float *>(),
mout1->size(), mout2->size(),
+ FuncTestUtils::CompareType::ABS,
threshold);
-
// Stage2: verification of performance counters
std::pair<std::string, std::string> wrongLayer =
BFloat16Helpers::matchPerfCountPrecisionVsExpected(req1.GetPerformanceCounts(), expectedPrecisions);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+namespace {
+ static const int inputSize = 2, concatAxe = 1;
+ static std::vector<SizeVector> paramVector = {
+ SizeVector({ 1, 1, inputSize, inputSize }),
+ SizeVector({ 1, 2, inputSize, inputSize }),
+ SizeVector({ 1, 3, inputSize, inputSize }),
+ SizeVector({ 1, 4, inputSize, inputSize }),
+ SizeVector({ 1, 5, inputSize, inputSize }),
+ SizeVector({ 1, 6, inputSize, inputSize }),
+ SizeVector({ 1, 7, inputSize, inputSize }),
+ SizeVector({ 1, 8, inputSize, inputSize }),
+ SizeVector({ 1, 9, inputSize, inputSize }),
+ SizeVector({ 1, 10, inputSize, inputSize })};
+} // namespace
+
+class Concat_in_place : public BasicBF16Test {
+protected:
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+ // scaleshift
+ // / \
+ // Conv Conv
+ // \ /
+ // concat
+ // |
+ // relu
+
+ ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ // multiply
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+ input1->set_friendly_name("Input_1");
+ std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+ } else {
+ const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+ // add
+ std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+ } else {
+ const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+ }
+ auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+ addNode->set_friendly_name("ADD_1");
+
+ // convolution
+ std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+ auto channelsCount = inputShapes[1];
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ if (netPrecision == Precision::FP32) {
+ std::vector<float> weightValuesFP32;
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+ weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ } else {
+ std::vector<short> weightValuesBF16;
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+ weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ }
+
+ std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+ addNode, weightsNode,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode1->set_friendly_name("CONV_1");
+
+ std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+ addNode, weightsNode,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode2->set_friendly_name("CONV_2");
+
+ // Concat
+ ngraph::NodeVector concInputNodes = { convNode1, convNode2 };
+
+ auto concNode = std::make_shared<opset1::Concat>(concInputNodes, concatAxe);
+ concNode->set_friendly_name("CONC_1_TEST");
+
+ // ReLU
+ auto reluNode = std::make_shared<opset1::Relu>(concNode);
+ reluNode->set_friendly_name("RELU_1");
+
+ return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
+ }
+
+ void SetUp() override {
+ std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+ fnPtr = createGraph(netPrecision);
+
+ // STAGE1:
+ threshold = 10e-1;
+ // STAGE2:
+ // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+ // performance counters
+ expectedPrecisions["ADD_1"] = "FP32";
+ expectedPrecisions["CONV_1"] = "BF16";
+ expectedPrecisions["CONV_2"] = "BF16";
+ expectedPrecisions["CONC_1_TEST"] = "FP32";
+ expectedPrecisions["RELU_1"] = "FP32";
+ }
+};
+
+TEST_P(Concat_in_place, CompareWithRefImpl) {
+ test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Concat_in_place,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::FP32),
+ ::testing::ValuesIn(paramVector),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Concat_in_place::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Concat_in_place,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::BF16),
+ ::testing::ValuesIn(paramVector),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Concat_in_place::getTestCaseName);
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ConvAdd : public BasicBF16Test {
+protected:
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+// Power (FP32)
+// |
+// Conv(BF16)
+// |
+// Eltwise (SUM)(BF16)
+// |
+// Conv (BF16)
+
+ auto channelsCount = inputShapes[1];
+
+ // STAGE1: construction of the GRAPH
+ ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ // add
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+ input1->set_friendly_name("Input_1");
+ std::shared_ptr<ngraph::opset1::Constant> eltConst0 = nullptr, eltConst1 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ eltConst0 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+ eltConst1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+ } else {
+ eltConst0 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ eltConst1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto addNode0 = std::make_shared<opset1::Multiply>(input1, eltConst0);
+ addNode0->set_friendly_name("Add_0");
+
+ // convolution
+ std::shared_ptr<ngraph::opset1::Constant> weightsNode0 = nullptr, weightsNode1 = nullptr;
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ if (netPrecision == Precision::FP32) {
+ std::vector<float> weightValuesFP32;
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+ weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ } else {
+ std::vector<short> weightValuesBF16;
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+ weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ }
+
+ std::shared_ptr<ngraph::Node> convNode0 = std::make_shared<ngraph::opset1::Convolution>(
+ addNode0, weightsNode0,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode0->set_friendly_name("Convolution_0");
+
+ // eltwise, i.e. sum
+ auto eltSumNode = std::make_shared<opset1::Add>(convNode0, eltConst1);
+ eltSumNode->set_friendly_name("Elt_sum");
+
+ // convolution
+ std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+ eltSumNode, weightsNode1,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode1->set_friendly_name("Convolution_1");
+
+ return std::make_shared<ngraph::Function>(convNode1, ngraph::ParameterVector{input1});
+ }
+ void SetUp() override {
+ std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+ fnPtr = createGraph(netPrecision);
+
+ // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+
+ // 256 channels
+ // threshold = 0.26f; // Max in fp32 network by output: 5.26852
+
+ // 3 channels
+ threshold = 0.2f; // Max in fp32 network by output: 4.90418
+
+ // STAGE3:
+ // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+ // performance counters
+ expectedPrecisions["Convolution_0"] = "BF16";
+ expectedPrecisions["Convolution_1"] = "BF16";
+ expectedPrecisions["Elt_sum"] = "FP32";
+ }
+};
+
+TEST_P(ConvAdd, CompareWithRefImpl) {
+ test();
+};
+
+// CPU plug-in failure in that case
+
+//INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ConvAdd,
+// ::testing::Combine(
+// ::testing::Values(Precision::FP32),
+// ::testing::Values(Precision::FP32),
+// ::testing::Values(SizeVector({1, 256, 38, 38})),
+// ::testing::Values(SizeVector()),
+// ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+// ConvAdd::getTestCaseName);
+//
+//INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ConvAdd,
+// ::testing::Combine(
+// ::testing::Values(Precision::FP32),
+// ::testing::Values(Precision::BF16),
+// ::testing::Values(SizeVector({1, 256, 38, 38})),
+// ::testing::Values(SizeVector()),
+// ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+// ConvAdd::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ConvAdd,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({1, 3, 38, 38})),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ ConvAdd::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ConvAdd,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::BF16),
+ ::testing::Values(SizeVector({1, 3, 38, 38})),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ ConvAdd::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
class ConvConv : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// ScaleShift (FP32)
// |
// Conv (BF16)
// |
// Conv (BF16)
+ ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{inputShapes});
auto const1 = opset1::Constant::create(ngraph::element::f32, Shape{1}, { 2.0f });
auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
addNode->set_friendly_name("ADD_1");
// convolution
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
- std::vector<float> weightValues;
- weightValues.resize(3 * 3 * 3 * 3);
- BFloat16Helpers::fillInputsBySinValues(weightValues.data(), weightValues.size());
- auto weightsNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape, weightValues);
+ std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+
+ auto channelsCount = inputShapes[1];
+
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ if (netPrecision == Precision::FP32) {
+ std::vector<float> weightValuesFP32;
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+ weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ } else {
+ std::vector<short> weightValuesBF16;
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+ weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ }
std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
addNode, weightsNode,
convNode1->set_friendly_name("CONV_1");
// Convolution
- ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
std::vector<float> weightValues2;
- weightValues2.resize(3 * 3 * 3 * 3);
+ weightValues2.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
auto weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape2, weightValues2);
std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
// STAGE1:
- // the maximum values in the latest tensor for this test is 24.4. It would be safe to set threshold eq to 0.1
- threshold = 0.3f;
+ threshold = 1.0f; // Max in fp32 network by output CONV_2: 49.3427
// STAGE2:
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
// performance counters
class ConvDWConvReLU : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32)
// |
// Conv (BF16)
// |
// ReLU (Fused Info DW convolution)
-
// multiply
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
addNode->set_friendly_name("ADD_1");
// convolution
+ auto channelsCount = inputShapes[1];
+
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
// DW convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
- ngraph::Shape convFilterShape2 = { 3, 1, 1, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape2 = { channelsCount, 1, 1, 3, 3 };
if (netPrecision == Precision::FP32) {
std::vector<float> weightValues2FP32;
- weightValues2FP32.resize(3 * 1 * 1 * 3 * 3);
+ weightValues2FP32.resize(channelsCount * 1 * 1 * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
} else {
std::vector<short> weightValues2BF16;
- weightValues2BF16.resize(3 * 1 * 1 * 3 * 3);
+ weightValues2BF16.resize(channelsCount * 1 * 1 * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
}
return std::make_shared<ngraph::Function>(reluNode2, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+typedef std::tuple< Precision, SizeVector, string, size_t, CoordinateDiff, string> convEltwiseDepthwiseTestParamsSet;
+
+class ConvEltwiseDepthwise :
+ public testing::WithParamInterface<convEltwiseDepthwiseTestParamsSet>, public LayerTestsUtils::LayerTestsCommon {
+public:
+ std::shared_ptr<Function> fnPtr;
+ SizeVector inputShapes;
+ std::map<string, string> expectedPrecisions;
+ float threshold = 3e-2;
+ Precision netPrecision;
+ size_t kernel;
+ CoordinateDiff pads;
+ string mkldnnPrimitive;
+
+protected:
+ std::shared_ptr<Function> createGraph(InferenceEngine::Precision netPrecision) {
+ // scaleshift (FP32)
+ // |
+ // Conv (BF16)
+ // |
+ // Relu (Eltwise Fused into Conv)
+ // |
+ // scaleshift (Depthwise Fused into Conv)
+
+ element::Type ntype = (netPrecision == Precision::FP32) ? element::f32 : element::bf16;
+ size_t chCnt = inputShapes[1];
+
+ // multiply
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, Shape{ inputShapes });
+ input1->set_friendly_name("Input_1");
+ std::shared_ptr<opset1::Constant> const1 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ const1 = opset1::Constant::create(ntype, Shape{ 1 }, { 2.0f });
+ } else {
+ const1 = opset1::Constant::create(ntype, Shape{ 1 }, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+ // add
+ std::shared_ptr<opset1::Constant> const2 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ const2 = opset1::Constant::create(ntype, Shape{ 1 }, { 1.0f });
+ } else {
+ const2 = opset1::Constant::create(ntype, Shape{ 1 }, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+ }
+ auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+ addNode->set_friendly_name("SS_1");
+
+ // convolution
+ std::shared_ptr<opset1::Constant> weightsNode = nullptr;
+ Shape convFilterShape = { chCnt, chCnt, kernel, kernel }; // out channel, /input channels, kernel h, kernel w
+ if (netPrecision == Precision::FP32) {
+ std::vector<float> weightValuesFP32;
+ weightValuesFP32.resize(chCnt * chCnt * kernel * kernel);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+ weightsNode = std::make_shared<opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ } else {
+ std::vector<short> weightValuesBF16;
+ weightValuesBF16.resize(chCnt * chCnt * kernel * kernel);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+ weightsNode = std::make_shared<opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ }
+
+ std::shared_ptr<Node> convNode1 = std::make_shared<opset1::Convolution>(
+ addNode, weightsNode, Strides({ 1, 1 }), pads, pads, Strides({ 1, 1 }), op::PadType::EXPLICIT);
+ convNode1->set_friendly_name("CONV");
+
+ // Eltwise, i.e. Relu
+ auto reluNode = std::make_shared<opset1::Relu>(convNode1);
+ reluNode->set_friendly_name("RELU");
+
+ // multiply
+ std::shared_ptr<opset1::Constant> const3 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ const3 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { 3.0f });
+ } else {
+ const3 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(3.0f)) });
+ }
+ auto mulNode2 = std::make_shared<opset1::Multiply>(reluNode, const3);
+
+ // add
+ std::shared_ptr<opset1::Constant> const4 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ const4 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { 2.0f });
+ } else {
+ const4 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
+ addNode2->set_friendly_name("SS_2");
+
+ return std::make_shared<Function>(NodeVector{ addNode2 }, ParameterVector{ input1 });
+ }
+public:
+ static string getTestCaseName(testing::TestParamInfo<convEltwiseDepthwiseTestParamsSet> obj) {
+ Precision netPrecision;
+ SizeVector inputShapes;
+ string targetDevice;
+ size_t kernel;
+ CoordinateDiff pads;
+ string mkldnnPrimitive;
+ std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, mkldnnPrimitive) = obj.param;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+ result << "netPRC=" << netPrecision.name() << "_";
+ result << "mkldnnPrimitive=" << mkldnnPrimitive << "_";
+ result << "targetDevice=" << targetDevice;
+ return result.str();
+ }
+
+ void Run_test() {
+ if (!InferenceEngine::with_cpu_x86_bfloat16()) {
+ // on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
+ // tests are useless on such platforms
+ return;
+ }
+ std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, mkldnnPrimitive) = this->GetParam();
+ InferenceEngine::CNNNetwork cnnNet(fnPtr);
+
+ for (const auto& inputItem : cnnNet.getInputsInfo()) {
+ inputItem.second->setPrecision(Precision::FP32);
+ }
+
+ string inputName = cnnNet.getInputsInfo().begin()->first;
+ string outputName = cnnNet.getOutputsInfo().begin()->first;
+ auto ie = InferenceEngine::Core();
+ // BF16 inference
+ std::map<string, string> options;
+ if (netPrecision == InferenceEngine::Precision::FP32) {
+ options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::YES;
+ } else {
+ options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::NO;
+ }
+ options[InferenceEngine::PluginConfigParams::KEY_PERF_COUNT] = InferenceEngine::PluginConfigParams::YES;
+ options[InferenceEngine::PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT] = "egraph_test";
+
+ auto exec_net1 = ie.LoadNetwork(cnnNet, targetDevice, options);
+ auto req1 = exec_net1.CreateInferRequest();
+
+ InferenceEngine::Blob::Ptr inBlob1 = req1.GetBlob(inputName);
+ BFloat16Helpers::fillInputsBySinValues(inBlob1);
+
+ req1.Infer();
+ auto outBlobBF16 = req1.GetBlob(outputName);
+ InferenceEngine::MemoryBlob::CPtr mout1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobBF16);
+ ASSERT_NE(mout1, nullptr);
+ auto lm1 = mout1->rmap();
+
+ // FP32 infrence
+ // if netPrecision is not eq to the FP32 - change network precision and recreate network
+ InferenceEngine::CNNNetwork cnnNetFP32(createGraph(InferenceEngine::Precision::FP32));
+ string inputNameFP32 = cnnNetFP32.getInputsInfo().begin()->first;
+ string outputNameFP32 = cnnNetFP32.getOutputsInfo().begin()->first;
+ for (const auto& inputItem : cnnNetFP32.getInputsInfo()) {
+ inputItem.second->setPrecision(Precision::FP32);
+ }
+ auto exec_net2 = ie.LoadNetwork(cnnNetFP32, targetDevice,
+ { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } });
+ auto req2 = exec_net2.CreateInferRequest();
+
+ req2.SetBlob(inputNameFP32, inBlob1);
+
+ req2.Infer();
+ auto outBlobFP32 = req2.GetBlob(outputNameFP32);
+ InferenceEngine::MemoryBlob::CPtr mout2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobFP32);
+ ASSERT_NE(mout2, nullptr);
+ auto lm2 = mout2->rmap();
+
+ FuncTestUtils::compareRawBuffers(lm1.as<const float*>(), lm2.as<const float*>(), mout1->size(), mout2->size(),
+ FuncTestUtils::CompareType::ABS_AND_REL,
+ threshold, threshold);
+
+ // Stage2: verification of performance counters
+ std::pair<string, string> wrongLayer =
+ BFloat16Helpers::matchPerfCountPrecisionVsExpected(req1.GetPerformanceCounts(), expectedPrecisions);
+ if (wrongLayer.first != string("")) {
+ string layerInPerfCounts = wrongLayer.first + " " + wrongLayer.second;
+ string layerExpected = wrongLayer.first + " " + expectedPrecisions[wrongLayer.first];
+ ASSERT_EQ(layerInPerfCounts, layerExpected);
+ }
+ fnPtr.reset();
+ }
+
+ void SetUp() override {
+ std::vector<size_t> inputShape;
+ std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, mkldnnPrimitive) = this->GetParam();
+ fnPtr = createGraph(netPrecision);
+
+ expectedPrecisions["SS_1"] = "FP32";
+ expectedPrecisions["CONV"] = mkldnnPrimitive;
+ expectedPrecisions["RELU"] = "ndef";
+ expectedPrecisions["SS_2"] = "ndef";
+ }
+};
+
+TEST_P(ConvEltwiseDepthwise, CompareWithRefImpl) {
+ Run_test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_1x1_depthwise_BF16, ConvEltwiseDepthwise,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({ 1, 5, 1, 1 })),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU),
+ ::testing::Values(size_t(1)),
+ ::testing::Values(CoordinateDiff({ 0, 0 })),
+ ::testing::Values(string("jit_avx512_1x1_BF16"))),
+ ConvEltwiseDepthwise::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_gemm_depthwise_BF16, ConvEltwiseDepthwise,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({ 1, 3, 10, 10 })),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU),
+ ::testing::Values(size_t(3)),
+ ::testing::Values(CoordinateDiff({ 1, 1 })),
+ ::testing::Values(string("jit_gemm_BF16"))),
+ ConvEltwiseDepthwise::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_conv_depthwise_BF16, ConvEltwiseDepthwise,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({ 1, 5, 10, 10 })),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU),
+ ::testing::Values(size_t(3)),
+ ::testing::Values(CoordinateDiff({ 0, 0 })),
+ ::testing::Values(string("jit_avx512_BF16"))),
+ ConvEltwiseDepthwise::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
class ConvReLUPoolConvReLUPool : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// Convolution1 (FP32)
// |
// ReLU1 (Fused)
// STAGE1: construction of the GRAPH
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
// convolution1
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
// convolution2
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
- ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesBF16.data());
}
// convolution3
std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
- ngraph::Shape convFilterShape3 = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape3 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
}
ngraph::op::PadType::EXPLICIT); // pad type
convNode3->set_friendly_name("Convolution_3");
-
-
-
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Elt_max : public BasicBF16Test {
+protected:
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+// Power (FP32)
+// |
+// Conv(BF16) Const(FP32)
+// | /
+// Eltwise(MAX)(FP32)
+// |
+// Conv(BF16)
+
+ // STAGE1: construction of the GRAPH
+ ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+ const int conv0OutputChannels = 1;
+
+ // add
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+ input1->set_friendly_name("Input_1");
+ std::shared_ptr<ngraph::opset1::Constant> powerConst = nullptr;
+ if (netPrecision == Precision::FP32) {
+ powerConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+ } else {
+ powerConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto powerNode0 = std::make_shared<opset1::Multiply>(input1, powerConst);
+ powerNode0->set_friendly_name("Power_0");
+
+ // convolution
+ std::shared_ptr<ngraph::opset1::Constant> weightsNode0 = nullptr, weightsNode1 = nullptr;
+ ngraph::Shape convFilterShape0 = { conv0OutputChannels, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape1 = { 1, conv0OutputChannels, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ if (netPrecision == Precision::FP32) {
+ std::vector<float> weightValuesFP32_0, weightValuesFP32_1;
+ weightValuesFP32_0.resize(conv0OutputChannels * channelsCount * 3 * 3);
+ weightValuesFP32_1.resize(1 * conv0OutputChannels * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesFP32_0.data(), weightValuesFP32_0.size());
+ BFloat16Helpers::fillInputsBySinValues(weightValuesFP32_1.data(), weightValuesFP32_1.size());
+ weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape0, weightValuesFP32_0);
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape1, weightValuesFP32_1);
+ } else {
+ std::vector<short> weightValuesBF16_0, weightValuesBF16_1;
+ weightValuesBF16_0.resize(conv0OutputChannels * channelsCount * 3 * 3);
+ weightValuesBF16_1.resize(1 * conv0OutputChannels * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesBF16_0.data(), weightValuesBF16_0.size());
+ BFloat16Helpers::fillInputsBySinValues(weightValuesBF16_1.data(), weightValuesBF16_1.size());
+ weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape0, weightValuesBF16_0.data());
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape1, weightValuesBF16_1.data());
+ }
+
+ std::shared_ptr<ngraph::Node> convNode0 = std::make_shared<ngraph::opset1::Convolution>(
+ powerNode0, weightsNode0,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode0->set_friendly_name("Convolution_0");
+
+ // Eltwise, i.e. Max
+ std::shared_ptr<ngraph::opset1::Constant> maxConst = nullptr;
+ auto batchSize = inputShapes[0];
+ auto heightSize = inputShapes[2];
+ auto widthSize = inputShapes[3];
+ if (netPrecision == Precision::FP32) {
+ maxConst = opset1::Constant::create(ntype, Shape{batchSize, conv0OutputChannels, heightSize, widthSize}, { 2.0f });
+ } else {
+ maxConst = opset1::Constant::create(ntype, Shape{batchSize, conv0OutputChannels, heightSize, widthSize},
+ { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ maxConst->set_friendly_name("Max_const");
+ auto eltMaxNode = std::make_shared<opset1::Maximum>(convNode0, maxConst);
+ eltMaxNode->set_friendly_name("Elt_max");
+
+ std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+ eltMaxNode, weightsNode1,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode1->set_friendly_name("Convolution_1");
+
+ return std::make_shared<ngraph::Function>(convNode1, ngraph::ParameterVector{input1});
+ }
+ void SetUp() override {
+ std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+ fnPtr = createGraph(netPrecision);
+
+ // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+ threshold = 0.2f; // Max in fp32 network by output: 20.0761
+
+ // STAGE3:
+ // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+ // performance counters
+ expectedPrecisions["Convolution_0"] = "BF16";
+ expectedPrecisions["Convolution_1"] = "BF16";
+ expectedPrecisions["Elt_max"] = "FP32";
+ }
+};
+
+TEST_P(Elt_max, CompareWithRefImpl) {
+ test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Elt_max,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({1, 3, 40, 40})),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Elt_max::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Elt_max,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::BF16),
+ ::testing::Values(SizeVector({1, 3, 40, 40})),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Elt_max::getTestCaseName);
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Elt_x3 : public BasicBF16Test {
+protected:
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+// Power (FP32)
+// / | \
+// Conv(BF16) Conv(BF16) Conv(BF16)
+// / | /
+// ----------------------------------------------
+// Eltwise(MAX)(FP32) Eltwise(Mul) (FP32)
+// | |
+// Conv(BF16) Conv(BF16)
+// \ /
+// Eltwise (SUM)(BF16)
+// |
+// Conv (BF16)
+
+ auto channelsCount = inputShapes[1];
+
+ // STAGE1: construction of the GRAPH
+ ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ // add
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+ input1->set_friendly_name("Input_1");
+ std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+ if (netPrecision == Precision::FP32) {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+ } else {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
+ addNode0->set_friendly_name("Add_0");
+
+ // convolution
+ std::shared_ptr<ngraph::opset1::Constant> weightsNode0_1 = nullptr, weightsNode0_2 = nullptr,
+ weightsNode0_3 = nullptr, weightsNode1 = nullptr,
+ weightsNode2 = nullptr, weightsNode3 = nullptr;
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ if (netPrecision == Precision::FP32) {
+ std::vector<float> weightValuesFP32;
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+ weightsNode0_1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ weightsNode0_2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ weightsNode0_3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ } else {
+ std::vector<short> weightValuesBF16;
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+ weightsNode0_1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ weightsNode0_2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ weightsNode0_3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ }
+
+ std::shared_ptr<ngraph::Node> convNode0_1 = std::make_shared<ngraph::opset1::Convolution>(
+ addNode0, weightsNode0_1,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode0_1->set_friendly_name("Convolution_0_1");
+
+ std::shared_ptr<ngraph::Node> convNode0_2 = std::make_shared<ngraph::opset1::Convolution>(
+ addNode0, weightsNode0_2,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode0_2->set_friendly_name("Convolution_0_2");
+
+ std::shared_ptr<ngraph::Node> convNode0_3 = std::make_shared<ngraph::opset1::Convolution>(
+ addNode0, weightsNode0_3,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode0_3->set_friendly_name("Convolution_0_3");
+
+ // Eltwise, i.e. Mul
+ auto eltMulNode = std::make_shared<opset1::Multiply>(convNode0_1, convNode0_2);
+ eltMulNode->set_friendly_name("Elt_mul");
+
+ // Eltwise, i.e. Max
+ std::shared_ptr<ngraph::opset1::Constant> maxConst = nullptr;
+ if (netPrecision == Precision::FP32) {
+ maxConst = opset1::Constant::create(ntype, Shape{inputShapes}, { 2.0f });
+ } else {
+ maxConst = opset1::Constant::create(ntype, Shape{inputShapes}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto eltMaxNode = std::make_shared<opset1::Maximum>(convNode0_3, maxConst);
+ eltMaxNode->set_friendly_name("Elt_max");
+
+ // convolution
+ std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+ eltMulNode, weightsNode1,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode1->set_friendly_name("Convolution_1");
+
+ std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+ eltMaxNode, weightsNode2,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode2->set_friendly_name("Convolution_2");
+
+ // eltwise, i.e. sum
+ auto eltSumNode = std::make_shared<opset1::Add>(convNode1, convNode2);
+ eltSumNode->set_friendly_name("Elt_sum");
+
+ // convolution
+ std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
+ eltSumNode, weightsNode3,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode3->set_friendly_name("Convolution_3");
+
+ return std::make_shared<ngraph::Function>(convNode3, ngraph::ParameterVector{input1});
+ }
+ void SetUp() override {
+ std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+ fnPtr = createGraph(netPrecision);
+
+ // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+
+ // 256 channels, 38 x 38 size
+ // threshold = 0.6f; // Max in fp32 network by output: 12.0983
+
+ // 3 channels, 4 x 4 size
+ threshold = 20.6f; // Max in fp32 network by output: 879.077
+
+ // STAGE3:
+ // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+ // performance counters
+ expectedPrecisions["Convolution_1"] = "BF16";
+ expectedPrecisions["Convolution_2"] = "BF16";
+ expectedPrecisions["Convolution_3"] = "BF16";
+ expectedPrecisions["Elt_max"] = "FP32";
+ expectedPrecisions["Elt_mul"] = "FP32";
+ expectedPrecisions["Elt_sum"] = "ndef";
+ }
+};
+
+TEST_P(Elt_x3, CompareWithRefImpl) {
+ test();
+};
+
+// CPU plug-in failure in that case
+
+//INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Elt_x3,
+// ::testing::Combine(
+// ::testing::Values(Precision::FP32),
+// ::testing::Values(Precision::FP32),
+// ::testing::Values(SizeVector({1, 256, 38, 38})),
+// ::testing::Values(SizeVector()),
+// ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+// Elt_x3::getTestCaseName);
+//
+//INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Elt_x3,
+// ::testing::Combine(
+// ::testing::Values(Precision::FP32),
+// ::testing::Values(Precision::BF16),
+// ::testing::Values(SizeVector({1, 256, 38, 38})),
+// ::testing::Values(SizeVector()),
+// ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+// Elt_x3::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Elt_x3,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({1, 3, 4, 4})),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Elt_x3::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Elt_x3,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::BF16),
+ ::testing::Values(SizeVector({1, 3, 4, 4})),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Elt_x3::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
class Faster100_5_1_1_Conv : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// Power (FP32)
// |
// Convolution (BF16)
// STAGE1: constructin og the GRAPH
+ auto channelsCount = inputShapes[1];
+
// multiply
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{10, 5, 1, 1});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// problematic convolution: 100x5x1x1
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 5, 5, 1, 1 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 1, 1 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValues;
- weightValues.resize(5 * 5 * 1 * 1, 0.f);
+ weightValues.resize(channelsCount * channelsCount * 1 * 1, 0.f);
weightValues[0] = 1.0f;
weightValues[7] = 1.0f;
weightValues[11] = 1.0f;
weightsNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape, weightValues);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(5 * 5 * 1 * 1, BFloat16Helpers::reducePrecisionBitwiseS(0.0f));
+ weightValuesBF16.resize(channelsCount * channelsCount * 1 * 1, BFloat16Helpers::reducePrecisionBitwiseS(0.0f));
weightValuesBF16[0] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
weightValuesBF16[7] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
weightValuesBF16[11] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Gather_multiply : public BasicBF16Test {
+protected:
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+// Add (FP32)
+// |
+// FC (BF16)
+// /
+// -------------------------------------------
+// Gather(BF16) Const
+// \ /
+// Mul(FP32)
+
+ // STAGE1: construction of the GRAPH
+ ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto inputSize = inputShapes[1];
+
+ // add
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+
+ input1->set_friendly_name("Input_1");
+ std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+ if (netPrecision == Precision::FP32) {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+ } else {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
+ addNode0->set_friendly_name("Add_1");
+
+ // matmul
+ std::shared_ptr<ngraph::opset1::Constant> matmulConst0 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize}, { 2.0f });
+ } else {
+ matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize},
+ { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto matmulNode = std::make_shared<opset1::MatMul>(addNode0, matmulConst0);
+ matmulNode->set_friendly_name("Matmul_0");
+
+ // gather
+ auto axesConst = opset1::Constant::create(ngraph::element::i64, Shape{1}, { 1 });
+ std::vector<size_t> gatherArray;
+ for (size_t i = 0; i < inputSize; i++) {
+ gatherArray.push_back(i);
+ }
+ auto indexesConst = opset1::Constant::create(ngraph::element::i64, Shape{inputSize}, gatherArray);
+ auto gatherNode = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
+ gatherNode->set_friendly_name("Gather_1");
+
+ // multiply
+ std::shared_ptr<ngraph::opset1::Constant> mulConst = nullptr;
+ if (netPrecision == Precision::FP32) {
+ mulConst = opset1::Constant::create(ntype, Shape{inputShapes}, { 2.0f });
+ } else {
+ mulConst = opset1::Constant::create(ntype, Shape{inputShapes}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto mulNode = std::make_shared<opset1::Multiply>(gatherNode, mulConst);
+ mulNode->set_friendly_name("Mul_1");
+
+ return std::make_shared<ngraph::Function>(mulNode, ngraph::ParameterVector{input1});
+ }
+ void SetUp() override {
+ std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+ fnPtr = createGraph(netPrecision);
+
+ // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+ threshold = 0.1f; // Max in fp32 network by output: 21.7285
+
+ // STAGE3:
+ // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+ // performance counters
+
+ expectedPrecisions["Matmul_0"] = "BF16";
+ expectedPrecisions["Mul_1"] = "FP32";
+ }
+};
+
+TEST_P(Gather_multiply, CompareWithRefImpl) {
+ test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Gather_multiply,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({1, 4})),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Gather_multiply::getTestCaseName);
+// CPU plug-in failure in that case
+
+//INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Gather_multiply,
+// ::testing::Combine(
+// ::testing::Values(Precision::FP32),
+// ::testing::Values(Precision::BF16),
+// ::testing::Values(SizeVector({1, 4})),
+// ::testing::Values(SizeVector()),
+// ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+// Gather_multiply::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Gather_x2_add_mul_relu_concat_matmul : public BasicBF16Test {
+protected:
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+// Add (FP32)
+// |
+// FullyConnected (BF16)
+// / | \
+// -------------------------------------------
+// Gather(FP32) Gather(FP32) Add (FP32)
+// \ / /
+// Mul(FP32) ReLU(FP32)
+// \ /
+// Concat(FP32) Const
+// \ /
+// Matmul(BF16)
+
+ // STAGE1: construction of the GRAPH
+ ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ // add
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+ auto inputSize = inputShapes[1];
+
+ input1->set_friendly_name("Input_1");
+ std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+ if (netPrecision == Precision::FP32) {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+ } else {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
+ addNode0->set_friendly_name("Add_1");
+
+ // matmul
+ std::shared_ptr<ngraph::opset1::Constant> matmulConst0 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize}, { 2.0f });
+ } else {
+ matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize},
+ { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto matmulNode = std::make_shared<opset1::MatMul>(addNode0, matmulConst0);
+ matmulNode->set_friendly_name("Matmul_0");
+
+ // gather
+ std::vector<size_t> gatherArray;
+ for (size_t i = 0; i < inputSize; i++) {
+ gatherArray.push_back(i);
+ }
+ auto axesConst = opset1::Constant::create(ngraph::element::i64, Shape{1}, { 1 });
+ auto indexesConst = opset1::Constant::create(ngraph::element::i64, Shape{inputSize}, gatherArray);
+ auto gatherNode1 = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
+ gatherNode1->set_friendly_name("Gather_1");
+
+ auto gatherNode2 = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
+ gatherNode2->set_friendly_name("Gather_2");
+
+ // multiply
+ auto mulNode = std::make_shared<opset1::Multiply>(gatherNode1, gatherNode2);
+ mulNode->set_friendly_name("Mul_1");
+
+ // add
+ auto addNode1 = std::make_shared<opset1::Multiply>(matmulNode, addConst);
+ addNode0->set_friendly_name("Add_1");
+
+ // ReLU
+ auto reluNode = std::make_shared<opset1::Relu>(addNode1);
+ reluNode->set_friendly_name("Relu_1");
+
+ // Concat
+ ngraph::NodeVector concInputNodes = {mulNode, reluNode};
+ auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
+ concNode->set_friendly_name("Conc_1");
+
+ // matmul
+ std::shared_ptr<ngraph::opset1::Constant> matmulConst1 = nullptr;
+ if (netPrecision == Precision::FP32) {
+ matmulConst1 = opset1::Constant::create(ntype, Shape{inputSize * 2, inputSize * 2}, { 2.0f });
+ } else {
+ matmulConst1 = opset1::Constant::create(ntype, Shape{inputSize * 2, inputSize * 2},
+ { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto matmulNode1 = std::make_shared<opset1::MatMul>(concNode, matmulConst1);
+ matmulNode1->set_friendly_name("Matmul_1");
+
+ return std::make_shared<ngraph::Function>(matmulNode1, ngraph::ParameterVector{input1});
+ }
+ void SetUp() override {
+ std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+ fnPtr = createGraph(netPrecision);
+
+ // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+ threshold = 170.02f; // Max in fp32 network by output: 3887.11
+
+ // STAGE3:
+ // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+ // performance counters
+ expectedPrecisions["Matmul_0"] = "BF16";
+ expectedPrecisions["Mul_1"] = "FP32";
+ expectedPrecisions["Add_1"] = "FP32";
+ expectedPrecisions["Relu_1"] = "FP32";
+ expectedPrecisions["Conc_1"] = "FP32";
+ expectedPrecisions["Matmul_1"] = "BF16";
+ }
+};
+
+TEST_P(Gather_x2_add_mul_relu_concat_matmul, CompareWithRefImpl) {
+ test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Gather_x2_add_mul_relu_concat_matmul,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({ 2048, 64 })),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Gather_x2_add_mul_relu_concat_matmul::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Gather_x2_add_mul_relu_concat_matmul,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::BF16),
+ ::testing::Values(SizeVector({ 2048, 64 })),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Gather_x2_add_mul_relu_concat_matmul::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Interpolation : public BasicBF16Test {
+protected:
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+ // Convolution (BF16)
+ // |
+ // Interpolation (In the case of mode = "linear") (FP32)
+ // |
+ // Convolution (BF16)
+
+ // STAGE1: construction of the GRAPH
+ ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
+ // add
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+ input1->set_friendly_name("Input_1");
+ std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+ if (netPrecision == Precision::FP32) {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+ } else {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto addNode = std::make_shared<opset1::Multiply>(input1, addConst);
+ addNode->set_friendly_name("Add_1");
+
+ // convolution
+ std::shared_ptr<ngraph::opset1::Constant> weightsNode1 = nullptr, weightsNode2 = nullptr;
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ if (netPrecision == Precision::FP32) {
+ std::vector<float> weightValuesFP32;
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ } else {
+ std::vector<short> weightValuesBF16;
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ }
+
+ std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+ addNode, weightsNode1,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode1->set_friendly_name("Convolution_1");
+
+ // interpolation
+ auto heightSize = static_cast<long>(inputShapes[2]);
+ auto weigthSize = static_cast<long>(inputShapes[3]);
+ std::vector<int64_t> outShape = {2 * heightSize, 2 * weigthSize};
+
+ auto interpolShape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, outShape);
+ ngraph::op::InterpolateAttrs attrs;
+ attrs.pads_begin.push_back(0);
+ attrs.pads_end.push_back(0);
+ attrs.axes = ngraph::AxisSet{2, 3};
+ attrs.align_corners = false;
+ attrs.mode = "linear";
+ attrs.antialias = false;
+ auto interpolNode = std::make_shared<opset1::Interpolate>(
+ convNode1,
+ interpolShape, attrs);
+ interpolNode->set_friendly_name("Interp");
+
+ std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+ interpolNode, weightsNode2,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode2->set_friendly_name("Convolution_2");
+ return std::make_shared<ngraph::Function>(convNode2, ngraph::ParameterVector{input1});
+ }
+ void SetUp() override {
+ std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+ fnPtr = createGraph(netPrecision);
+
+ // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+ threshold = 0.02f; // Max in fp32 network by output: 2.531
+
+ // STAGE3:
+ // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+ // performance counters
+ expectedPrecisions["Convolution_1"] = "BF16";
+ expectedPrecisions["Interp"] = "FP32";
+ expectedPrecisions["Convolution_2"] = "BF16";
+ }
+};
+
+TEST_P(Interpolation, CompareWithRefImpl) {
+ test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Interpolation,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({ 1, 1, 2, 2 })),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Interpolation::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Interpolation,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::BF16),
+ ::testing::Values(SizeVector({ 1, 1, 2, 2 })),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Interpolation::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
class MobileNet_ssd_with_branching : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift
// |
// Conv1 (FP32)
// Concat
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// Conv1
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
// DW convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
- ngraph::Shape convFilterShape2 = { 3, 1, 1, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape2 = { channelsCount, 1, 1, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValues2FP32;
- weightValues2FP32.resize(3 * 1 * 1 * 3 * 3);
+ weightValues2FP32.resize(channelsCount * 1 * 1 * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
} else {
std::vector<short> weightValues2BF16;
- weightValues2BF16.resize(3 * 1 * 1 * 3 * 3);
+ weightValues2BF16.resize(channelsCount * 1 * 1 * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
}
return std::make_shared<ngraph::Function>(concNode, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Resample : public BasicBF16Test {
+protected:
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+ // Convolution (BF16)
+ // |
+ // Interpolation (Resample in the case of mode = "nearest") (FP32)
+ // |
+ // Convolution (BF16)
+
+ // STAGE1: construction of the GRAPH
+
+ ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ // add
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+ auto channelsCount = inputShapes[1];
+ input1->set_friendly_name("Input_1");
+ std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+ if (netPrecision == Precision::FP32) {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+ } else {
+ addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+ }
+ auto addNode = std::make_shared<opset1::Multiply>(input1, addConst);
+ addNode->set_friendly_name("Add_1");
+
+ // convolution
+ std::shared_ptr<ngraph::opset1::Constant> weightsNode1 = nullptr, weightsNode2 = nullptr;
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ if (netPrecision == Precision::FP32) {
+ std::vector<float> weightValuesFP32;
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+ } else {
+ std::vector<short> weightValuesBF16;
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+ BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+ weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+ }
+
+ std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+ addNode, weightsNode1,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode1->set_friendly_name("Convolution_1");
+
+ // interpolation
+ auto heightSize = static_cast<long>(inputShapes[2]);
+ auto weigthSize = static_cast<long>(inputShapes[3]);
+ std::vector<int64_t> outShape = {2 * heightSize, 2 * weigthSize};
+
+ auto interpolShape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, outShape);
+ ngraph::op::InterpolateAttrs attrs;
+ attrs.pads_begin.push_back(0);
+ attrs.pads_end.push_back(0);
+ attrs.axes = ngraph::AxisSet{2, 3};
+ attrs.align_corners = false;
+ attrs.mode = "nearest";
+ attrs.antialias = false;
+ auto interpolNode = std::make_shared<opset1::Interpolate>(
+ convNode1,
+ interpolShape, attrs);
+ interpolNode->set_friendly_name("Interp");
+
+ std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+ interpolNode, weightsNode2,
+ ngraph::Strides({ 1, 1 }), // strides
+ ngraph::CoordinateDiff({ 1, 1 }), // pad begin
+ ngraph::CoordinateDiff({ 1, 1 }), // pad end
+ ngraph::Strides({ 1, 1 }), // dilation
+ ngraph::op::PadType::EXPLICIT); // pad type
+ convNode2->set_friendly_name("Convolution_2");
+ return std::make_shared<ngraph::Function>(convNode2, ngraph::ParameterVector{input1});
+ }
+ void SetUp() override {
+ std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+ fnPtr = createGraph(netPrecision);
+
+ // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+ threshold = 0.02f; // Max in fp32 network by output: 2.35926
+
+ // STAGE3:
+ // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+ // performance counters
+ expectedPrecisions["Convolution_1"] = "BF16";
+ expectedPrecisions["Interp"] = "FP32";
+ expectedPrecisions["Convolution_2"] = "BF16";
+ }
+};
+
+TEST_P(Resample, CompareWithRefImpl) {
+ test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Resample,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(SizeVector({ 1, 1, 2, 2 })),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Resample::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Resample,
+ ::testing::Combine(
+ ::testing::Values(Precision::FP32),
+ ::testing::Values(Precision::BF16),
+ ::testing::Values(SizeVector({ 1, 1, 2, 2 })),
+ ::testing::Values(SizeVector()),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ Resample::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
class ScaleshiftConvEltwiseConv : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32) Conv (FP32)
// \ /
// Eltwise (Fused into Conv)
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+ auto channelsCount = inputShapes[1];
+
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
// Convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
- ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValues2;
- weightValues2.resize(3 * 3 * 3 * 3);
+ weightValues2.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
} else {
std::vector<short> weightValues2BF16;
- weightValues2BF16.resize(3 * 3 * 3 * 3);
+ weightValues2BF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
// STAGE1:
- threshold = 0.2f; // max value in the latest tensor for FP32 network is 37.77
+ threshold = 1.0f; // max value in the latest tensor for FP32 network is 37.77
// STAGE2:
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
// performance counters
class ScaleshiftConvEltwiseReluConv : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32) Conv (FP32_
// \ /
// Eltwise (Fused into conv)
// Conv (BF16)
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
// Convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
- ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValues2;
- weightValues2.resize(3 * 3 * 3 * 3);
+ weightValues2.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
} else {
std::vector<short> weightValues2BF16;
- weightValues2BF16.resize(3 * 3 * 3 * 3);
+ weightValues2BF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
// STAGE1:
- threshold = 9e-2;
+ threshold = 1.0f; // Max in fp32 network by output CONV_2: 30.1374
// STAGE2:
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
// performance counters
class ScaleshiftConvEltwiseScaleshift : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32)
// |
// Conv (BF16)
// scaleshift (FP32)
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode2}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
class ScaleshiftConvEluConv : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32)
// |
// Conv (BF16)
// Conv (BF16)
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
// performance counters
expectedPrecisions["ADD_1"] = "FP32";
expectedPrecisions["CONV_1"] = "BF16";
- expectedPrecisions["ELU_1"] = "FP32";
expectedPrecisions["CONV_2"] = "BF16";
}
};
class ScaleshiftConvRelu : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32)
// |
// Conv (BF16)
// relu (Fused into convolution)
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
class ScaleshiftConv_x2_ConcatRelu : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift
// / \
// Conv Conv
// relu
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
// Concat
ngraph::NodeVector concInputNodes = { convNode1, convNode2 };
-
- // test is to be failed, if axis == 1 - TODO
- auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 2);
+ auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
concNode->set_friendly_name("CONC_1");
// ReLU
return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
class ScaleshiftConv_x2_Eltwise : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32)
// / \
// Conv1 (BF16) Conv1 (BF16)
// eltwise (Fused into Conv1) produce FP32 output
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
class ScaleshiftConv_x2_mixed1_Eltwise : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32)
// | |
// Conv1(BF16) Conv2(FP32)
// eltwise(Fused into Conv1)
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
class ScaleshiftConv_x2_mixed2_Eltwise : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32)
// | |
// Conv1 (FP32) Conv2 (Bf16)
// eltwise (Fused into Conv1)
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto channelsCount = inputShapes[1];
+
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// scaleshift (FP32)
//
// / \
// Conv3 (BF16)
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+ const int outChannels = 16;
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 16, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { outChannels, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(16 * 3 * 3 * 3);
+ weightValuesFP32.resize(outChannels * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(16 * 3 * 3 * 3);
+ weightValuesBF16.resize(outChannels * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
// Convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
- ngraph::Shape convFilterShape3 = { 16, 16, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape3 = { outChannels, outChannels, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(16 * 16 * 3 * 3);
+ weightValuesFP32.resize(outChannels * outChannels * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(16 * 16 * 3 * 3);
+ weightValuesBF16.resize(outChannels * outChannels * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
// STAGE1:
- threshold = 1.0f; // max value in the latest tensor for FP32 network is 93.3
+ threshold = 2.0f; // max value in the latest tensor for FP32 network is 93.3
// STAGE2:
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
class Scaleshift_x2_Conv_x2_Eltwise : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
//
// scaleshift (FP32) scaleshift (FP32)
// \ / \
// |
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode, convNode2}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
class Scaleshift_x3_ConvEltwiseRelu : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
//
// scaleshift (FP32)
// |
// scaleshift (FP32)
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 3, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(3 * 3 * 3 * 3);
+ weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(3 * 3 * 3 * 3);
+ weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode3}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
}
};
- TEST_P(Scaleshift_x3_ConvEltwiseRelu, CompareWithRefImpl) {
- test();
- };
+TEST_P(Scaleshift_x3_ConvEltwiseRelu, CompareWithRefImpl) {
+ test();
+};
INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Scaleshift_x3_ConvEltwiseRelu,
::testing::Combine(
class PoolingAfterConv : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// Scaleshift (FP32)
// |
// Convolution (BF16)
// STAGE1: construction of the GRAPH
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+ const int outChannels = 16;
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 16, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { outChannels, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(16 * 3 * 3 * 3);
+ weightValuesFP32.resize(outChannels * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(16 * 3 * 3 * 3);
+ weightValuesBF16.resize(outChannels * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{avgpoolNode}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
class TopKInputsI32 : public BasicBF16Test {
protected:
- std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+ std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
// Power (FP32)
// |
// Convolution1 (BF16) Const (I32)
// STAGE1: construction of the GRAPH
ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+ auto channelsCount = inputShapes[1];
+ const int intermediateChannelsCount = 16;
+
// multiply
- auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+ auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
input1->set_friendly_name("Input_1");
std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
if (netPrecision == Precision::FP32) {
// convolution
std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
- ngraph::Shape convFilterShape = { 16, 3, 3, 3 }; // out channel, /input channels, kernel h, kernel w
+ ngraph::Shape convFilterShape = { intermediateChannelsCount, channelsCount, 3, 3 }; // out channel, /input channels, kernel h, kernel w
if (netPrecision == Precision::FP32) {
std::vector<float> weightValuesFP32;
- weightValuesFP32.resize(16 * 3 * 3 * 3);
+ weightValuesFP32.resize(intermediateChannelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
} else {
std::vector<short> weightValuesBF16;
- weightValuesBF16.resize(16 * 3 * 3 * 3);
+ weightValuesBF16.resize(intermediateChannelsCount * channelsCount * 3 * 3);
BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
}
return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2, goe1}, ngraph::ParameterVector{input1});
}
- void SetUp()override {
+ void SetUp() override {
std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
fnPtr = createGraph(netPrecision);
- threshold = 0.14f; // max value in the latest tensor for FP32 network is 22.6
+ threshold = 0.5f; // max value in the latest tensor for FP32 network is 22.6
// STAGE2:
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <behavior/core_threading_tests.hpp>
+
+namespace {
+
+Params params[] = {
+ std::tuple<Device, Config> { "CPU", { { CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(YES) } } },
+ std::tuple<Device, Config> { "HETERO", { { "TARGET_FALLBACK", "CPU" } } },
+ std::tuple<Device, Config> { "MULTI", { { MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , "CPU" } } }
+};
+
+} // namespace
+
+INSTANTIATE_TEST_CASE_P(CPU, CoreThreadingTests, testing::ValuesIn(params));
+
+INSTANTIATE_TEST_CASE_P(CPU, CoreThreadingTestsWithIterations,
+ testing::Combine(testing::ValuesIn(params),
+ testing::Values(4),
+ testing::Values(50)));
const auto maxPool_ExplicitPad_CeilRounding_Params = ::testing::Combine(
::testing::Values(ngraph::helpers::PoolingTypes::MAX),
::testing::ValuesIn(kernels),
- // TODO: Non 1 strides fails in ngraph reference implementation with error "The end corner is out of bounds at axis 3" thrown in the test body.
- ::testing::Values(std::vector<size_t>({1, 1})),
+ ::testing::ValuesIn(strides),
::testing::ValuesIn(padBegins),
::testing::ValuesIn(padEnds),
::testing::Values(ngraph::op::RoundingType::CEIL),
::testing::Values(ngraph::helpers::PoolingTypes::AVG),
::testing::ValuesIn(kernels),
// TODO: Non 1 strides fails in ngraph reference implementation with error "The end corner is out of bounds at axis 3" thrown in the test body.
- ::testing::Values(std::vector<size_t>({1, 1})),
- // TODO: Non zero pads excluded because of accuracy mismatch
- ::testing::Values(std::vector<size_t>({0, 0})),
- ::testing::Values(std::vector<size_t>({0, 0})),
+ ::testing::ValuesIn(strides),
+ ::testing::ValuesIn(std::vector<std::vector<size_t>>({{0, 0}, {1, 1}, {0, 1}})),
+ ::testing::ValuesIn(std::vector<std::vector<size_t>>({{0, 0}, {1, 1}, {0, 1}})),
::testing::Values(ngraph::op::RoundingType::CEIL),
::testing::Values(ngraph::op::PadType::EXPLICIT),
::testing::Values(true, false)
::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
PoolingLayerTest::getTestCaseName);
+
+std::vector<poolSpecificParams> psParams({poolSpecificParams(ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {0, 0}, {0, 0},
+ ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false),
+ poolSpecificParams(ngraph::helpers::PoolingTypes::AVG, {7, 7}, {1, 1}, {0, 0}, {1, 1},
+ ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false)});
+
+INSTANTIATE_TEST_CASE_P(AvgPool_ExplicitPad_CeilRounding_corner, PoolingLayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(psParams),
+ ::testing::ValuesIn(netPrecisions),
+ ::testing::Values(std::vector<size_t >({1, 1024, 6, 6})),
+ ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+ PoolingLayerTest::getTestCaseName);
+
/* +========== Explicit Pad Floor Rounding ========== */
const auto avgPoolExplicitPadFloorRoundingParams = ::testing::Combine(
::testing::Values(ngraph::helpers::PoolingTypes::AVG),
::testing::ValuesIn(kernels),
::testing::ValuesIn(strides),
- // TODO: Non zero pads excluded because of accuracy mismatch
- ::testing::Values(std::vector<size_t>({0, 0})),
- ::testing::Values(std::vector<size_t>({0, 0})),
+ ::testing::ValuesIn(std::vector<std::vector<size_t>>({{0, 0}, {1, 1}})),
+ ::testing::ValuesIn(std::vector<std::vector<size_t>>({{0, 0}, {1, 1}})),
::testing::Values(ngraph::op::RoundingType::FLOOR),
::testing::Values(ngraph::op::PadType::EXPLICIT),
::testing::Values(true, false)
-} // namespace
\ No newline at end of file
+} // namespace
+
std::vector<std::string> disabledTestPatterns() {
return {
+ // TODO: Issue 26264
+ R"(.*(MaxPool|AvgPool).*S\(1\.2\).*Rounding=CEIL.*)"
};
}
\ No newline at end of file
const std::vector<std::vector<size_t >> dilations3d = {{1, 1, 1},
{1, 2, 1}};
-const auto conv3DParams_FP32 = ::testing::Combine(
+const auto conv3DParams = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(paddings3d),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
-const auto conv3DParams_FP16 = ::testing::Combine(
- ::testing::Values(std::vector<size_t >({3, 3, 3})),
- ::testing::ValuesIn(strides3d),
- ::testing::ValuesIn(paddings3d),
- ::testing::ValuesIn(paddings3d),
- ::testing::ValuesIn(dilations3d),
- ::testing::Values(5),
- ::testing::Values(ngraph::op::PadType::EXPLICIT)
-);
-
-INSTANTIATE_TEST_CASE_P(Convolution3D_FP32, ConvolutionLayerTest,
+INSTANTIATE_TEST_CASE_P(Convolution3D, ConvolutionLayerTest,
::testing::Combine(
- conv3DParams_FP32,
- ::testing::Values(InferenceEngine::Precision::FP32),
- ::testing::Values(std::vector<size_t >({1, 3, 10, 10, 10})),
- ::testing::Values(CommonTestUtils::DEVICE_GPU)),
- ConvolutionLayerTest::getTestCaseName);
-
-INSTANTIATE_TEST_CASE_P(Convolution3D_FP16, ConvolutionLayerTest,
- ::testing::Combine(
- conv3DParams_FP16,
- ::testing::Values(InferenceEngine::Precision::FP16),
+ conv3DParams,
+ ::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t >({1, 3, 10, 10, 10})),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
ConvolutionLayerTest::getTestCaseName);
-
} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/out_shape_of_reshape.hpp"
+
+#include <common_test_utils/test_common.hpp>
+
+#include <ngraph/op/parameter.hpp>
+#include <ngraph/function.hpp>
+
+#include <details/ie_exception.hpp>
+
+#include <gtest/gtest.h>
+
+namespace {
+
+using TensorShape = ngraph::PartialShape;
+using TensorType = ngraph::element::Type;
+
+using TestParams = std::tuple<
+ TensorShape,
+ TensorType,
+ TensorShape,
+ TensorType>;
+
+class OutShapeOfReshapeTests
+ : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<TestParams> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& inDataShapeTensorShape = std::get<0>(parameters);
+ const auto& inTensorShapeTensorType = std::get<1>(parameters);
+ const auto& outShapeDescriptorTensorShape = std::get<2>(parameters);
+ const auto& outShapeDescriptorTensorType = std::get<3>(parameters);
+
+ m_inDataShapeParam = std::make_shared<ngraph::op::Parameter>(
+ inTensorShapeTensorType, inDataShapeTensorShape);
+ m_outShapeDescriptorParam = std::make_shared<ngraph::op::Parameter>(
+ outShapeDescriptorTensorType, outShapeDescriptorTensorShape);
+ }
+
+protected:
+ std::shared_ptr<ngraph::op::Parameter> m_inDataShapeParam;
+ std::shared_ptr<ngraph::op::Parameter> m_outShapeDescriptorParam;
+};
+
+std::vector<ngraph::PartialShape> tensorShapes {
+ TensorShape{1},
+ TensorShape{3},
+ TensorShape{4},
+};
+
+std::set<ngraph::element::Type> allNGraphTypes() {
+ return {
+ ngraph::element::dynamic,
+ ngraph::element::boolean,
+ ngraph::element::bf16,
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::f64,
+ ngraph::element::i8,
+ ngraph::element::i16,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u1,
+ ngraph::element::u8,
+ ngraph::element::u16,
+ ngraph::element::u32,
+ ngraph::element::u64
+ };
+}
+
+std::set<ngraph::element::Type> allNGraphIntegralNumberTypes() {
+ return {
+ ngraph::element::i8,
+ ngraph::element::i16,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u1,
+ ngraph::element::u8,
+ ngraph::element::u16,
+ ngraph::element::u32,
+ ngraph::element::u64
+ };
+}
+
+//
+// Positive tests
+//
+
+TEST_P(OutShapeOfReshapeTests, CanValidateAndInferTypes) {
+ std::shared_ptr<ngraph::vpu::op::OutShapeOfReshape> op;
+ ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+ m_inDataShapeParam, m_outShapeDescriptorParam, true));
+ ASSERT_NO_THROW(std::make_shared<ngraph::Function>(
+ ngraph::OutputVector{op->output(0)},
+ ngraph::ParameterVector{m_inDataShapeParam, m_outShapeDescriptorParam}));
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, OutShapeOfReshapeTests, testing::Combine(
+ testing::ValuesIn(tensorShapes),
+ testing::ValuesIn(allNGraphIntegralNumberTypes()),
+ testing::ValuesIn(tensorShapes),
+ testing::ValuesIn(allNGraphIntegralNumberTypes()))
+);
+
+//
+// Negative tests
+//
+
+std::set<ngraph::element::Type> allNGraphNotIntegralTypes() {
+ auto notIntegralTypes = std::set<ngraph::element::Type>{};
+ const auto& allTypes = allNGraphTypes();
+ const auto& allIntegralTypes = allNGraphIntegralNumberTypes();
+ std::set_difference(allTypes.cbegin(), allTypes.cend(), allIntegralTypes.cbegin(), allIntegralTypes.cend(),
+ std::inserter(notIntegralTypes, notIntegralTypes.begin()));
+ return notIntegralTypes;
+}
+
+using OutShapeOfReshapeTestsNegativeDataType = OutShapeOfReshapeTests;
+TEST_P(OutShapeOfReshapeTestsNegativeDataType, ThrowsOnInvalidDataType) {
+ std::shared_ptr<ngraph::vpu::op::OutShapeOfReshape> op;
+ ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+ m_inDataShapeParam, m_outShapeDescriptorParam, true),
+ ngraph::NodeValidationFailure);
+}
+INSTANTIATE_TEST_CASE_P(InvalidInDataShapeTensorType, OutShapeOfReshapeTestsNegativeDataType,
+ testing::Combine(
+ testing::Values(TensorShape{4}),
+ testing::ValuesIn(allNGraphNotIntegralTypes()),
+ testing::Values(TensorShape{3}),
+ testing::Values(ngraph::element::i64))
+);
+
+INSTANTIATE_TEST_CASE_P(InvalidOutShapeDescriptorTensorType, OutShapeOfReshapeTestsNegativeDataType,
+ testing::Combine(
+ testing::Values(TensorShape{4}),
+ testing::Values(ngraph::element::i64),
+ testing::Values(TensorShape{3}),
+ testing::ValuesIn(allNGraphNotIntegralTypes()))
+);
+
+std::vector<ngraph::PartialShape> invalidTensorShapes {
+ TensorShape{},
+ TensorShape{4, 8},
+ TensorShape{ngraph::Dimension::dynamic()},
+};
+
+using OutShapeOfReshapeTestsNegativeDataShape = OutShapeOfReshapeTests;
+TEST_P(OutShapeOfReshapeTestsNegativeDataShape, ThrowsOnInvalidDataShape) {
+ std::shared_ptr<ngraph::vpu::op::OutShapeOfReshape> op;
+ ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+ m_inDataShapeParam, m_outShapeDescriptorParam, true),
+ ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(InvalidInDataShapeTensorShape, OutShapeOfReshapeTestsNegativeDataShape,
+ testing::Combine(
+ testing::ValuesIn(invalidTensorShapes),
+ testing::Values(ngraph::element::i64),
+ testing::ValuesIn(tensorShapes),
+ testing::Values(ngraph::element::i64))
+);
+
+INSTANTIATE_TEST_CASE_P(InvalidOutShapeDescriptorTensorShape, OutShapeOfReshapeTestsNegativeDataShape,
+ testing::Combine(
+ testing::ValuesIn(tensorShapes),
+ testing::Values(ngraph::element::i64),
+ testing::ValuesIn(invalidTensorShapes),
+ testing::Values(ngraph::element::i64))
+);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/static_shape_broadcast.hpp"
+
+#include <common_test_utils/test_common.hpp>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/function.hpp>
+
+#include <details/ie_exception.hpp>
+
+#include <gtest/gtest.h>
+
+namespace {
+
+using TensorType = ngraph::element::Type;
+using TensorShape = ngraph::PartialShape;
+using AxesMapping = std::vector<size_t>;
+
+struct BroadcastNumpyShapes {
+ TensorShape srcShape;
+ TensorShape targetShape;
+};
+
+struct BroadcastExplicitShapes {
+ TensorShape srcShape;
+ TensorShape targetShape;
+ AxesMapping axesMapping;
+};
+
+using BroadcastNumpyTestParams = std::tuple<TensorType, BroadcastNumpyShapes>;
+using BroadcastExplicitTestParams = std::tuple<TensorType, BroadcastExplicitShapes>;
+
+class StaticShapeBroadcastNumpyTests
+ : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<BroadcastNumpyTestParams> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& tensorType = std::get<0>(parameters);
+ const auto& tensorShape = std::get<1>(parameters).srcShape;
+ const auto& targetShape = std::get<1>(parameters).targetShape;
+
+ m_tensor = std::make_shared<ngraph::opset3::Parameter>(tensorType, tensorShape);
+ m_tensorWithTargetShape = std::make_shared<ngraph::opset3::Parameter>(tensorType, targetShape);
+ }
+protected:
+ std::shared_ptr<ngraph::opset3::Parameter> m_tensor;
+ std::shared_ptr<ngraph::opset3::Parameter> m_tensorWithTargetShape;
+};
+
+class StaticShapeBroadcastExplicitTests
+ : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<BroadcastExplicitTestParams> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& tensorType = std::get<0>(parameters);
+ const auto& tensorShape = std::get<1>(parameters).srcShape;
+ const auto& targetShape = std::get<1>(parameters).targetShape;
+ const auto& axesMapping = std::get<1>(parameters).axesMapping;
+
+ m_tensor = std::make_shared<ngraph::opset3::Parameter>(tensorType, tensorShape);
+ m_tensorWithTargetShape = std::make_shared<ngraph::opset3::Parameter>(tensorType, targetShape);
+ m_axesMapping = std::make_shared<ngraph::opset3::Constant>(
+ ngraph::element::u64, ngraph::Shape{axesMapping.size()}, axesMapping);
+ }
+protected:
+ std::shared_ptr<ngraph::opset3::Parameter> m_tensor;
+ std::shared_ptr<ngraph::opset3::Parameter> m_tensorWithTargetShape;
+ std::shared_ptr<ngraph::opset3::Constant> m_axesMapping;
+};
+
+std::vector<BroadcastNumpyShapes> testNumpyStaticShapes {
+ BroadcastNumpyShapes{TensorShape{1, 100}, TensorShape{4, 100}},
+ BroadcastNumpyShapes{TensorShape{1, 100}, TensorShape{2, 4, 100}},
+ BroadcastNumpyShapes{TensorShape{16, 1, 1}, TensorShape{2, 16, 50, 50}},
+};
+
+std::vector<BroadcastExplicitShapes> testExplicitStaticShapes {
+ BroadcastExplicitShapes{TensorShape{16}, TensorShape{1, 16, 50, 50}, AxesMapping{1}},
+ BroadcastExplicitShapes{TensorShape{50, 50}, TensorShape{1, 50, 50, 16}, AxesMapping{1, 2}},
+};
+
+std::vector<ngraph::element::Type> testNGraphNumericTypes {
+ ngraph::element::dynamic,
+ ngraph::element::bf16,
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::f64,
+ ngraph::element::i8,
+ ngraph::element::i16,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u1,
+ ngraph::element::u8,
+ ngraph::element::u16,
+ ngraph::element::u32,
+ ngraph::element::u64,
+};
+
+//
+// Positive tests
+//
+
+TEST_P(StaticShapeBroadcastNumpyTests, CanValidateAndInferTypes) {
+ const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+ std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+ ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+ m_tensor, shapeOf));
+ ASSERT_NO_THROW(std::make_shared<ngraph::Function>(
+ ngraph::OutputVector{op->output(0)},
+ ngraph::ParameterVector{m_tensor, m_tensorWithTargetShape}));
+ ASSERT_EQ(m_tensorWithTargetShape->get_shape(), op->output(0).get_shape());
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastNumpyTests, testing::Combine(
+ testing::ValuesIn(testNGraphNumericTypes),
+ testing::ValuesIn(testNumpyStaticShapes))
+);
+
+TEST_P(StaticShapeBroadcastExplicitTests, CanValidateAndInferTypes) {
+ const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+ std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+ ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+ m_tensor, shapeOf, m_axesMapping));
+ ASSERT_NO_THROW(std::make_shared<ngraph::Function>(
+ ngraph::OutputVector{op->output(0)},
+ ngraph::ParameterVector{m_tensor, m_tensorWithTargetShape}));
+ ASSERT_EQ(m_tensorWithTargetShape->get_shape(), op->output(0).get_shape());
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastExplicitTests, testing::Combine(
+ testing::ValuesIn(testNGraphNumericTypes),
+ testing::ValuesIn(testExplicitStaticShapes))
+);
+
+//
+// Negative tests
+//
+
+using StaticShapeBroadcastNumpyTestsNegativeNumInputs = StaticShapeBroadcastNumpyTests;
+TEST_P(StaticShapeBroadcastNumpyTestsNegativeNumInputs, ThrowsOnInvalidNumInputs) {
+ const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+ const auto axesMapping = std::make_shared<ngraph::opset3::Constant>(
+ ngraph::element::u64, ngraph::Shape{1}, 0);
+ std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+ ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+ m_tensor, shapeOf, axesMapping, ngraph::op::BroadcastType::NUMPY),
+ ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastNumpyTestsNegativeNumInputs, testing::Combine(
+ testing::Values(ngraph::element::f16),
+ testing::Values(testNumpyStaticShapes[0]))
+);
+
+using StaticShapeBroadcastExplicitTestsNegativeNumInputs = StaticShapeBroadcastExplicitTests;
+TEST_P(StaticShapeBroadcastExplicitTestsNegativeNumInputs, ThrowsOnInvalidNumInputs) {
+ const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+ std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+ ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+ m_tensor, shapeOf, ngraph::op::BroadcastType::EXPLICIT),
+ ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastExplicitTestsNegativeNumInputs, testing::Combine(
+ testing::Values(ngraph::element::f16),
+ testing::Values(testExplicitStaticShapes[0]))
+);
+
+using StaticShapeBroadcastTestsNegativeMode = StaticShapeBroadcastNumpyTests;
+TEST_P(StaticShapeBroadcastTestsNegativeMode, ThrowsOnInvalidMode) {
+ const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+ std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+ ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+ m_tensor, shapeOf, ngraph::op::BroadcastType::BIDIRECTIONAL),
+ ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastTestsNegativeMode, testing::Combine(
+ testing::Values(ngraph::element::f16),
+ testing::Values(testNumpyStaticShapes[0]))
+);
+
+using StaticShapeBroadcastTestsNegativeEvaluate = StaticShapeBroadcastNumpyTests;
+TEST_P(StaticShapeBroadcastTestsNegativeEvaluate, ThrowsOnInvalidMode) {
+ const auto targetShape = std::make_shared<ngraph::opset3::Parameter>(
+ ngraph::element::u64, ngraph::Shape{4});
+ std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+ ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+ m_tensor, targetShape), ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastTestsNegativeEvaluate, testing::Combine(
+ testing::Values(ngraph::element::f16),
+ testing::Values(testNumpyStaticShapes[0]))
+);
+
+} // namespace
using TensorType = ngraph::element::Type;
using TensorShape = ngraph::PartialShape;
+typedef std::tuple<
+ TensorType, // input type
+ TensorShape, // input shape
+ TensorType // output type
+> staticShapeNonZeroTestParams;
+
class StaticShapeNonZeroTests
: public CommonTestUtils::TestsCommon,
- public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+ public testing::WithParamInterface<staticShapeNonZeroTestParams> {
public:
void SetUp() override {
const auto& parameters = GetParam();
const auto& tensorType = std::get<0>(parameters);
const auto& tensorShape = std::get<1>(parameters);
+ m_outputType = std::get<2>(parameters);
m_param = std::make_shared<ngraph::opset3::Parameter>(tensorType, tensorShape);
}
protected:
std::shared_ptr<ngraph::opset3::Parameter> m_param;
+ ngraph::element::Type m_outputType;
};
std::vector<ngraph::PartialShape> testStaticShapes {
ngraph::element::u64,
};
+std::vector<ngraph::element::Type> outputTypes {
+ ngraph::element::i32,
+ ngraph::element::i64,
+};
+
+
//
// Positive tests
//
TEST_P(StaticShapeNonZeroTests, CanValidateAndInferTypes) {
std::shared_ptr<ngraph::vpu::op::StaticShapeNonZero> op;
- ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param));
+ ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param, m_outputType));
ASSERT_NO_THROW(std::make_shared<ngraph::Function>(
ngraph::OutputVector{op->output(0), op->output(1)},
ngraph::ParameterVector{m_param}));
INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTests, testing::Combine(
testing::ValuesIn(testNGraphNumericTypes),
- testing::ValuesIn(testStaticShapes))
+ testing::ValuesIn(testStaticShapes),
+ testing::ValuesIn(outputTypes))
);
//
// Negative tests
//
-using StaticShapeNonZeroTestsNegativeDataType = StaticShapeNonZeroTests;
-TEST_P(StaticShapeNonZeroTestsNegativeDataType, ThrowsOnInvalidDataType) {
+using StaticShapeNonZeroTestsNegativeInputDataType = StaticShapeNonZeroTests;
+TEST_P(StaticShapeNonZeroTestsNegativeInputDataType, ThrowsOnInvalidInputType) {
std::shared_ptr<ngraph::vpu::op::StaticShapeNonZero> op;
- ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param),
+ ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param, m_outputType),
ngraph::NodeValidationFailure);
}
-INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeDataType, testing::Combine(
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeInputDataType, testing::Combine(
testing::Values(ngraph::element::boolean),
- testing::ValuesIn(testStaticShapes))
+ testing::ValuesIn(testStaticShapes),
+ testing::ValuesIn(outputTypes))
);
using StaticShapeNonZeroTestsNegativeDataShape = StaticShapeNonZeroTests;
TEST_P(StaticShapeNonZeroTestsNegativeDataShape, ThrowsOnInvalidDataShape) {
std::shared_ptr<ngraph::vpu::op::StaticShapeNonZero> op;
- ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param),
+ ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param, m_outputType),
ngraph::NodeValidationFailure);
}
INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeDataShape, testing::Combine(
testing::ValuesIn(testNGraphNumericTypes),
- testing::ValuesIn(testDynamicShapes))
+ testing::ValuesIn(testDynamicShapes),
+ testing::ValuesIn(outputTypes))
+);
+
+using StaticShapeNonZeroTestsNegativeOutputDataType = StaticShapeNonZeroTests;
+TEST_P(StaticShapeNonZeroTestsNegativeOutputDataType, ThrowsOnInvalidOutputType) {
+ std::shared_ptr<ngraph::vpu::op::StaticShapeNonZero> op;
+ ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param, m_outputType),
+ ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeOutputDataType, testing::Combine(
+ testing::ValuesIn(testNGraphNumericTypes),
+ testing::ValuesIn(testStaticShapes),
+ testing::Values(ngraph::element::boolean))
);
} // namespace
eltwise->set_output_type(0, eltwise->get_input_element_type(0), ngraph::PartialShape::dynamic(eltwise->get_output_partial_shape(0).rank()));
const auto transformations = vpu::Transformations{{eltwiseType, vpu::dynamicToStaticShapeBinaryEltwise}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
eltwise->set_output_type(0, eltwise->get_input_element_type(0), ngraph::PartialShape::dynamic(eltwise->get_output_partial_shape(0).rank()));
const auto transformations = vpu::Transformations{{eltwiseType, vpu::dynamicToStaticShapeBinaryEltwise}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_broadcast.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape.hpp"
+#include "vpu/ngraph/operations/static_shape_broadcast.hpp"
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset3.hpp>
+
+#include <common_test_utils/test_common.hpp>
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <map>
+#include <vector>
+
+namespace {
+
+using TensorType = ngraph::element::Type;
+using TensorShape = ngraph::PartialShape;
+using AxesMapping = std::vector<size_t>;
+
+struct BroadcastExplicitShapes {
+ TensorShape srcShape;
+ TensorShape targetShape;
+ AxesMapping axesMapping;
+};
+using BroadcastExplicitTestParams = std::tuple<TensorType, BroadcastExplicitShapes>;
+
+class DynamicToStaticShapeBroadcastTests
+ : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<BroadcastExplicitTestParams> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& tensorType = std::get<0>(parameters);
+ const auto& tensorShape = std::get<1>(parameters).srcShape;
+ const auto& targetShape = std::get<1>(parameters).targetShape;
+ const auto& axesMapping = std::get<1>(parameters).axesMapping;
+
+ ngraph::helpers::CompareFunctions(
+ *transform(tensorType, tensorShape, targetShape, axesMapping),
+ *reference(tensorType, tensorShape, targetShape, axesMapping));
+ }
+
+protected:
+ std::shared_ptr<const ngraph::Function> transform(
+ const TensorType& tensorType,
+ const TensorShape& tensorShape,
+ const TensorShape& targetShape,
+ const AxesMapping& axesMapping) const {
+ const auto tensorParam = std::make_shared<ngraph::opset3::Parameter>(
+ tensorType, tensorShape);
+ const auto tensorWithTargetShapeParam = std::make_shared<ngraph::opset3::Parameter>(
+ tensorType, targetShape);
+
+ const auto shapeOfNode = std::make_shared<ngraph::opset3::ShapeOf>(tensorWithTargetShapeParam);
+ shapeOfNode->set_is_foldable(false);
+
+ const auto axesMappingConstant = std::make_shared<ngraph::opset3::Constant>(
+ ngraph::element::u64, ngraph::Shape{axesMapping.size()}, axesMapping);
+
+ const auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(
+ tensorParam, shapeOfNode, axesMappingConstant);
+
+ auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{broadcast},
+ ngraph::ParameterVector{tensorParam, tensorWithTargetShapeParam},
+ "Actual");
+
+ // We need to set broadcast output shape to make its rank static.
+ // In opset3::Broadcast implementation with Explicit mode output shape gets
+ // static rank only in cases when the second input is Concat
+ std::vector<ngraph::Dimension> broadcastOutShape(
+ shapeOfNode->get_output_shape(0)[0], ngraph::Dimension::dynamic());
+ broadcast->set_output_type(0, tensorParam->get_output_element_type(0),
+ ngraph::PartialShape(broadcastOutShape));
+ function->get_result()->set_output_type(0, tensorParam->get_output_element_type(0),
+ targetShape);
+
+ const auto transformations = vpu::Transformations{{
+ ngraph::opset3::Broadcast::type_info, vpu::dynamicToStaticShapeBroadcast}};
+ vpu::DynamicToStaticShape(transformations).transform(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const TensorType& tensorType,
+ const TensorShape& tensorShape,
+ const TensorShape& targetShape,
+ const AxesMapping& axesMapping) const {
+ const auto tensorParam = std::make_shared<ngraph::opset3::Parameter>(
+ tensorType, tensorShape);
+ const auto tensorWithTargetShapeParam = std::make_shared<ngraph::opset3::Parameter>(
+ tensorType, targetShape);
+ const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(tensorWithTargetShapeParam);
+
+ const auto axesMappingConstant = std::make_shared<ngraph::opset3::Constant>(
+ ngraph::element::u64, ngraph::Shape{axesMapping.size()}, axesMapping);
+
+ const auto staticShapeBroadcast = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+ tensorParam, shapeOf, axesMappingConstant);
+
+ const auto dsrOut = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+ staticShapeBroadcast, shapeOf);
+ return std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{dsrOut},
+ ngraph::ParameterVector{tensorParam, tensorWithTargetShapeParam},
+ "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeBroadcastTests, compareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeBroadcastTests, testing::Combine(
+ testing::Values(
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ BroadcastExplicitShapes{TensorShape{16}, TensorShape{1, 16, 50, 50}, AxesMapping{1}},
+ BroadcastExplicitShapes{TensorShape{50, 50}, TensorShape{1, 50, 50, 16}, AxesMapping{1, 2}})
+
+));
+
+} // namespace
node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
const auto transformations = vpu::Transformations{{ngraph::opset3::Clamp::type_info, vpu::dynamicToStaticUnaryElementwise}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_common.hpp>
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape_concat.hpp>
+#include <vpu/utils/error.hpp>
+
+#include <ngraph/op/parameter.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/shape.hpp>
+#include <ngraph/type/element_type.hpp>
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+
+#include <numeric>
+#include <queue>
+#include <random>
+
+namespace {
+
+using DataType = ngraph::element::Type;
+using DataShape = ngraph::Shape;
+using DataShapes = std::vector<DataShape>;
+
+struct ConcatParam {
+ DataShapes dataShapes;
+ int axis;
+};
+using ConcatTestParam = std::tuple<DataType, ConcatParam>;
+
+class DynamicToStaticShapeConcatTests
+ : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<ConcatTestParam> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& dataType = std::get<0>(parameters);
+ const auto& concatParam = std::get<1>(parameters);
+ const auto& dataShapes = concatParam.dataShapes;
+ const auto& axis = concatParam.axis;
+
+ ngraph::helpers::CompareFunctions(
+ *transform(dataType, dataShapes, axis),
+ *reference(dataType, dataShapes, axis));
+ }
+
+protected:
+ std::shared_ptr<ngraph::Node> createDSRWithParams(
+ const DataShape& dataShape,
+ const ngraph::element::Type& dataType,
+ ngraph::ParameterVector& params) const {
+ const auto param = std::make_shared<ngraph::opset3::Parameter>(
+ dataType, dataShape);
+ const auto shape = std::make_shared<ngraph::opset3::Parameter>(
+ ngraph::element::i64, ngraph::Shape{dataShape.size()});
+ params.push_back(param);
+ params.push_back(shape);
+ return std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(param, shape);
+ }
+
+ std::shared_ptr<const ngraph::Function> transform(
+ const ngraph::element::Type& dataType,
+ const DataShapes& dataShapes,
+ const int axis) const {
+ ngraph::NodeVector dsrVector;
+ ngraph::ParameterVector params;
+ for (const auto& dataShape : dataShapes) {
+ dsrVector.push_back(createDSRWithParams(dataShape, dataType, params));
+ }
+
+ const auto concat = std::make_shared<ngraph::opset3::Concat>(dsrVector, axis);
+ const auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{concat}, params, "Actual");
+ concat->set_output_type(0, dsrVector[0]->get_input_element_type(0),
+ ngraph::PartialShape::dynamic(concat->get_output_partial_shape(0).rank()));
+
+ const auto transformations = vpu::Transformations{
+ {ngraph::opset3::Concat::type_info, vpu::dynamicToStaticShapeConcat}};
+ vpu::DynamicToStaticShape(transformations).transform(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const ngraph::element::Type& dataType,
+ const DataShapes& dataShapes,
+ const int axis) const {
+ ngraph::NodeVector dsrVector;
+ ngraph::ParameterVector params;
+
+ dsrVector.push_back(createDSRWithParams(dataShapes.front(), dataType, params));
+
+ auto accumulatedShape = params.back()->output(0);
+ for (size_t inputIdx = 1; inputIdx < dataShapes.size(); ++inputIdx) {
+ dsrVector.push_back(createDSRWithParams(
+ dataShapes.at(inputIdx), dataType, params));
+ const auto shapeAccumulatorOp = std::make_shared<ngraph::opset3::Add>(
+ accumulatedShape, params.back());
+ accumulatedShape = shapeAccumulatorOp->output(0);
+ }
+
+ const size_t rank = dataShapes.front().size();
+ std::vector<int64_t> dividerValues(rank, dataShapes.size());
+ dividerValues[axis < 0 ? axis + rank : axis] = 1;
+ const auto divider = std::make_shared<ngraph::opset3::Constant>(
+ ngraph::element::i64, ngraph::Shape{rank}, dividerValues);
+ const auto divide = std::make_shared<ngraph::opset3::Divide>(accumulatedShape, divider);
+
+ const auto concat = std::make_shared<ngraph::opset3::Concat>(dsrVector, axis);
+ const auto outDsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(concat, divide);
+ return std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{outDsr}, params, "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeConcatTests, CompareFunctions) {
+}
+
+std::vector<ngraph::element::Type> dataTypes = {
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8,
+};
+
+std::vector<ConcatParam> concatParams = {
+ {DataShapes{DataShape{128}, DataShape{256}, DataShape{512}, DataShape{1024}}, 0},
+ {DataShapes{DataShape{1, 1000}, DataShape{2, 1000}, DataShape{4, 1000}, DataShape{8, 1000}}, 0},
+ {DataShapes{DataShape{128, 100}, DataShape{128, 200}, DataShape{128, 400}, DataShape{128, 800}}, 1},
+ {DataShapes{DataShape{3, 64, 128}, DataShape{4, 64, 128}, DataShape{5, 64, 128}}, 0},
+ {DataShapes{DataShape{3, 64, 128}, DataShape{3, 64, 256}, DataShape{3, 64, 512}}, 2},
+ {DataShapes{DataShape{3, 64, 128}, DataShape{3, 64, 256}, DataShape{3, 64, 512}}, -1},
+};
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeConcatTests, testing::Combine(
+ testing::ValuesIn(dataTypes),
+ testing::ValuesIn(concatParams)));
+
+} // namespace
convert->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
const auto transformations = vpu::Transformations{{ngraph::opset3::Convert::type_info, vpu::dynamicToStaticUnaryElementwise}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_common.hpp>
+#include <ngraph/shape.hpp>
+#include <ngraph/type/element_type.hpp>
+#include <ngraph/op/parameter.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <numeric>
+#include <random>
+#include <ngraph/opsets/opset3.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape_gather.hpp>
+#include <queue>
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+#include <vpu/utils/error.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataDims = ngraph::Shape;
+
+struct GatherTestCase {
+ ngraph::Shape data_shape, index_shape;
+ int64_t axis, first_split_point, second_split_point;
+};
+
+const auto combinations = testing::Combine(
+ testing::Values(
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ GatherTestCase{{6}, {15, 4, 20, 28}, 0, 0, 0},
+ GatherTestCase{{6, 12, 10, 24}, {6}, 0, 0, 1},
+ GatherTestCase{{6, 12}, {15, 4, 20, 28}, 1, 1, 2},
+ GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, 3, 3, 4},
+ GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, -1, 3, 4},
+ GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, -4, 0, 1}));
+
+
+class DynamicToStaticShapeGatherDataDSR : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<DataType, DataType, GatherTestCase>> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& idx_type = std::get<1>(parameters);
+ const auto& gather_setup = std::get<2>(parameters);
+
+ ngraph::helpers::CompareFunctions(*transform(data_type, idx_type, gather_setup),
+ *reference(data_type, idx_type, gather_setup));
+ }
+
+protected:
+ std::shared_ptr<const ngraph::Function> transform(
+ const ngraph::element::Type_t& data_type,
+ const ngraph::element::Type_t& idx_type,
+ const GatherTestCase& gather_setup) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+ const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+ const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+ const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+ const auto node = std::make_shared<ngraph::opset3::Gather>(dsr, indices, axis);
+
+ auto outputShape = node->get_output_partial_shape(0);
+ const auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{node},
+ ngraph::ParameterVector{data, dims, indices},
+ "Actual");
+ node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(
+ gather_setup.data_shape.size() + gather_setup.index_shape.size() - 1));
+
+ const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeGather}};
+ vpu::DynamicToStaticShape(transformations).transform(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const ngraph::element::Type_t& data_type,
+ const ngraph::element::Type_t& idx_type,
+ const GatherTestCase& gather_setup) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+ const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+ const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+ const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+ const auto node = std::make_shared<ngraph::opset3::Gather>(dsr, indices, axis);
+
+ const auto indices_shape = ngraph::opset3::Constant::create(dims->get_element_type(), {gather_setup.index_shape.size()}, gather_setup.index_shape);
+ ngraph::OutputVector output_dims;
+ if (gather_setup.first_split_point) {
+ std::vector<int64_t> idxs(gather_setup.first_split_point);
+ std::iota(idxs.begin(), idxs.end(), 0);
+ output_dims.push_back(
+ std::make_shared<ngraph::opset3::Gather>(
+ dims,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+ }
+ if (!gather_setup.index_shape.empty())
+ output_dims.push_back(indices_shape);
+ if (gather_setup.first_split_point + 1 < gather_setup.data_shape.size()) {
+ std::vector<int64_t> idxs(gather_setup.data_shape.size() - gather_setup.second_split_point);
+ std::iota(idxs.begin(), idxs.end(), gather_setup.second_split_point);
+ output_dims.push_back(
+ std::make_shared<ngraph::opset3::Gather>(
+ dims,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+ }
+ const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+ const auto dsr1 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node, output_shape);
+ return std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{dsr1},
+ ngraph::ParameterVector{data, dims, indices},
+ "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeGatherDataDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeGatherDataDSR, combinations);
+
+class DynamicToStaticShapeGatherIdxDSR : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<DataType, DataType, GatherTestCase>> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& idx_type = std::get<1>(parameters);
+ const auto& gather_setup = std::get<2>(parameters);
+
+ ngraph::helpers::CompareFunctions(*transform(data_type, idx_type, gather_setup),
+ *reference(data_type, idx_type, gather_setup));
+ }
+
+protected:
+ std::shared_ptr<const ngraph::Function> transform(
+ const ngraph::element::Type_t& data_type,
+ const ngraph::element::Type_t& idx_type,
+ const GatherTestCase& gather_setup) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+ const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+ const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+ const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, dims);
+ const auto node = std::make_shared<ngraph::opset3::Gather>(data, dsr, axis);
+
+ auto outputShape = node->get_output_partial_shape(0);
+ const auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{node},
+ ngraph::ParameterVector{data, dims, indices},
+ "Actual");
+ node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(
+ gather_setup.data_shape.size() + gather_setup.index_shape.size() - 1));
+
+ const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeGather}};
+ vpu::DynamicToStaticShape(transformations).transform(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const ngraph::element::Type_t& data_type,
+ const ngraph::element::Type_t& idx_type,
+ const GatherTestCase& gather_setup) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+ const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+ const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+ const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, dims);
+ const auto node = std::make_shared<ngraph::opset3::Gather>(data, dsr, axis);
+
+ const auto data_shape = ngraph::opset3::Constant::create(dims->get_element_type(), {gather_setup.data_shape.size()}, gather_setup.data_shape);
+
+ ngraph::OutputVector output_dims;
+ if (gather_setup.first_split_point) {
+ std::vector<int64_t> idxs(gather_setup.first_split_point);
+ std::iota(idxs.begin(), idxs.end(), 0);
+ output_dims.push_back(
+ std::make_shared<ngraph::opset3::Gather>(
+ data_shape,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+ }
+ if (!gather_setup.index_shape.empty())
+ output_dims.push_back(dims);
+ if (gather_setup.first_split_point + 1 < gather_setup.data_shape.size()) {
+ std::vector<int64_t> idxs(gather_setup.data_shape.size() - gather_setup.second_split_point);
+ std::iota(idxs.begin(), idxs.end(), gather_setup.second_split_point);
+ output_dims.push_back(
+ std::make_shared<ngraph::opset3::Gather>(
+ data_shape,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+ }
+ const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+ const auto dsr1 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node, output_shape);
+ return std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{dsr1},
+ ngraph::ParameterVector{data, dims, indices},
+ "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeGatherIdxDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeGatherIdxDSR, combinations);
+
+class DynamicToStaticShapeGather : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<DataType, DataType, GatherTestCase>> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& idx_type = std::get<1>(parameters);
+ const auto& gather_setup = std::get<2>(parameters);
+
+ ngraph::helpers::CompareFunctions(*transform(data_type, idx_type, gather_setup),
+ *reference(data_type, idx_type, gather_setup));
+ }
+
+protected:
+ std::shared_ptr<const ngraph::Function> transform(
+ const ngraph::element::Type_t& data_type,
+ const ngraph::element::Type_t& idx_type,
+ const GatherTestCase& gather_setup) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+ const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+ const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+ const auto data_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+ const auto indices_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+ const auto data_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, data_dims);
+ const auto indices_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, indices_dims);
+ const auto node = std::make_shared<ngraph::opset3::Gather>(data_dsr, indices_dsr, axis);
+
+ auto outputShape = node->get_output_partial_shape(0);
+ const auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{node},
+ ngraph::ParameterVector{data, data_dims, indices, indices_dims},
+ "Actual");
+ node->set_output_type(0, node->get_input_element_type(0), ngraph::PartialShape::dynamic(
+ gather_setup.data_shape.size() + gather_setup.index_shape.size() - 1));
+
+ const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeGather}};
+ vpu::DynamicToStaticShape(transformations).transform(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const ngraph::element::Type_t& data_type,
+ const ngraph::element::Type_t& idx_type,
+ const GatherTestCase& gather_setup) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+ const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+ const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+ const auto data_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+ const auto indices_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+ const auto data_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, data_dims);
+ const auto indices_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, indices_dims);
+ const auto node = std::make_shared<ngraph::opset3::Gather>(data_dsr, indices_dsr, axis);
+
+ ngraph::OutputVector output_dims;
+ if (gather_setup.first_split_point) {
+ std::vector<int64_t> idxs(gather_setup.first_split_point);
+ std::iota(idxs.begin(), idxs.end(), 0);
+ output_dims.push_back(
+ std::make_shared<ngraph::opset3::Gather>(
+ data_dims,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+ }
+ if (!gather_setup.index_shape.empty())
+ output_dims.push_back(indices_dims);
+ if (gather_setup.first_split_point + 1 < gather_setup.data_shape.size()) {
+ std::vector<int64_t> idxs(gather_setup.data_shape.size() - gather_setup.second_split_point);
+ std::iota(idxs.begin(), idxs.end(), gather_setup.second_split_point);
+ output_dims.push_back(
+ std::make_shared<ngraph::opset3::Gather>(
+ data_dims,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+ }
+ const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+ const auto dsr1 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node, output_shape);
+ return std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{dsr1},
+ ngraph::ParameterVector{data, data_dims, indices, indices_dims},
+ "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeGather, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeGather, combinations);
+
+} // namespace
node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticNonMaxSuppression}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
using TensorType = ngraph::element::Type_t;
using TensorShape = ngraph::Shape;
-class DynamicToStaticShapeNonZeroTests : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+typedef std::tuple<
+ TensorType, // input type
+ TensorShape, // input shape
+ TensorType // output type
+> dynamicToStaticShapeNonZeroTestParams;
+
+class DynamicToStaticShapeNonZeroTests : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<dynamicToStaticShapeNonZeroTestParams> {
public:
void prepareFunctions() {
const auto& parameters = GetParam();
- const auto& tensorType = std::get<0>(parameters);
- const auto& tensorShape = std::get<1>(parameters);
+ const auto& inputType = std::get<0>(parameters);
+ const auto& inputShape = std::get<1>(parameters);
+ const auto& resultType = std::get<2>(parameters);
// Create a function with only op::NonZero
// And then run conversion pass
{
- const auto input = std::make_shared<ngraph::opset3::Parameter>(tensorType, tensorShape);
+ const auto input = std::make_shared<ngraph::opset3::Parameter>(inputType, inputShape);
- const auto nonZero = std::make_shared<ngraph::opset3::NonZero>(input);
+ const auto nonZero = std::make_shared<ngraph::opset3::NonZero>(input, resultType);
nonZero->set_friendly_name(s_FriendlyName);
actual = std::make_shared<ngraph::Function>(ngraph::NodeVector{nonZero}, ngraph::ParameterVector{input});
const auto transformation = vpu::Transformations{{ngraph::opset3::NonZero::type_info, vpu::dynamicToStaticShapeNonZero}};
- vpu::DynamicToStaticShape(transformation).transform(*actual);
+ vpu::DynamicToStaticShape(transformation).transform(actual);
}
// Create a reference function
{
- const auto input = std::make_shared<ngraph::opset1::Parameter>(tensorType, tensorShape);
+ const auto input = std::make_shared<ngraph::opset1::Parameter>(inputType, inputShape);
- const auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(input);
+ const auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(input, resultType);
staticShapeNonZero->set_friendly_name(std::string(s_FriendlyName) + "/static_shape");
const auto dynamicShapeResolver = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
staticShapeNonZero->output(0), staticShapeNonZero->output(1));
TensorShape{1000},
TensorShape{4, 1000},
TensorShape{3, 128, 256},
- TensorShape{2, 3, 128, 256})
+ TensorShape{2, 3, 128, 256}),
+ testing::Values(
+ ngraph::element::i32,
+ ngraph::element::i64)
));
} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_reshape.hpp"
+#include "vpu/ngraph/operations/out_shape_of_reshape.hpp"
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+
+#include <common_test_utils/test_common.hpp>
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <map>
+#include <vector>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type;
+using DataShape = ngraph::Shape;
+using TestParams = std::tuple<DataShape, DataType>;
+
+class DynamicToStaticShapeReshapeTests
+ : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<TestParams> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& inDataShape = std::get<0>(parameters);
+ const auto& inDataType = std::get<1>(parameters);
+
+ ngraph::helpers::CompareFunctions(
+ *transform(inDataType, inDataShape),
+ *reference(inDataType, inDataShape));
+ }
+
+protected:
+ std::shared_ptr<const ngraph::Function> transform(
+ const ngraph::element::Type& inDataType,
+ const ngraph::Shape& inDataShape) const {
+ const auto inDataParam = std::make_shared<ngraph::op::Parameter>(
+ inDataType, inDataShape);
+ const auto inDataDimsParam = std::make_shared<ngraph::op::Parameter>(
+ ngraph::element::i64, ngraph::Shape{inDataShape.size()});
+ const auto outShapeDescriptorParam = std::make_shared<ngraph::op::Constant>(
+ ngraph::element::i64, ngraph::Shape{inDataShape.size()}, inDataShape);
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+ inDataParam, inDataDimsParam);
+ const auto reshape = std::make_shared<ngraph::op::v1::Reshape>(
+ dsr, outShapeDescriptorParam, true);
+
+ auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{reshape},
+ ngraph::ParameterVector{inDataParam, inDataDimsParam},
+ "Actual");
+ reshape->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(
+ outShapeDescriptorParam->get_output_partial_shape(0).rank()));
+
+ const auto transformations = vpu::Transformations{{
+ ngraph::op::v1::Reshape::type_info, vpu::dynamicToStaticShapeReshape}};
+ vpu::DynamicToStaticShape(transformations).transform(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const ngraph::element::Type& inDataType,
+ const ngraph::Shape& inDataShape) const {
+ const auto inDataParam = std::make_shared<ngraph::op::Parameter>(
+ inDataType, inDataShape);
+ const auto inDataDimsParam = std::make_shared<ngraph::op::Parameter>(
+ ngraph::element::i64, ngraph::Shape{inDataShape.size()});
+ const auto outShapeDescriptorParam = std::make_shared<ngraph::op::Constant>(
+ ngraph::element::i64, ngraph::Shape{inDataShape.size()}, inDataShape);
+
+ const auto dsr0 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+ inDataParam, inDataDimsParam);
+ const auto reshape = std::make_shared<ngraph::op::v1::Reshape>(
+ dsr0, outShapeDescriptorParam, true);
+
+ const auto outShapeOfReshape = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+ inDataDimsParam, outShapeDescriptorParam, true);
+ const auto dsr1 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+ reshape, outShapeOfReshape);
+ return std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{dsr1},
+ ngraph::ParameterVector{inDataParam, inDataDimsParam},
+ "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeReshapeTests, compareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeReshapeTests, testing::Combine(
+ testing::Values(
+ DataShape{4, 1000},
+ DataShape{3, 128, 256},
+ DataShape{2, 3, 128, 256}),
+ testing::Values(
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8)
+));
+
+} // namespace
node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeROIAlign}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeROIAlign}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
node->set_output_type(0, data_dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeROIAlign}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
const auto transformations = vpu::Transformations{{scatter_setup.scatter_type_info, vpu::dynamicToStaticUnaryElementwise}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_shapeof.hpp"
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset3.hpp>
+
+#include <common_test_utils/test_common.hpp>
+#include <gtest/gtest.h>
+
+
+namespace {
+
+using TensorType = ngraph::element::Type_t;
+using TensorShape = ngraph::Shape;
+
+class DynamicToStaticShapeShapeOfRemoveDSR : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& data_shape = std::get<1>(parameters);
+
+ ngraph::helpers::CompareFunctions(*transform(data_type, data_shape),
+ *reference(data_type, data_shape));
+ }
+
+protected:
+ std::shared_ptr<const ngraph::Function> transform(
+ const TensorType& dataType,
+ const TensorShape& dataShape) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataShape);
+ const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, shape);
+ const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(dsr->output(0));
+
+ const auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{shapeOf},
+ ngraph::ParameterVector{data, shape},
+ "Actual");
+
+ vpu::DynamicToStaticShapeShapeOf().run_on_function(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const TensorType& dataType,
+ const TensorShape& dataShape) const {
+ const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+ return std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{shape},
+ ngraph::ParameterVector{shape},
+ "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeShapeOfRemoveDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeShapeOfRemoveDSR, testing::Combine(
+ testing::Values(
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ TensorShape{1000},
+ TensorShape{4, 1000},
+ TensorShape{3, 128, 256},
+ TensorShape{2, 3, 128, 256})
+));
+
+class DynamicToStaticShapeShapeOfWithOutRemoveDSR : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& data_shape = std::get<1>(parameters);
+
+ ngraph::helpers::CompareFunctions(*transform(data_type, data_shape),
+ *reference(data_type, data_shape));
+ }
+
+protected:
+ std::shared_ptr<const ngraph::Function> transform(
+ const TensorType& dataType,
+ const TensorShape& dataShape) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataShape);
+ const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, shape);
+ const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(dsr->output(0));
+ const auto shapeOfOutputRelu = std::make_shared<ngraph::opset3::Relu>(shapeOf->output(0));
+
+ const auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{shapeOfOutputRelu},
+ ngraph::ParameterVector{data, shape},
+ "Actual");
+
+ vpu::DynamicToStaticShapeShapeOf().run_on_function(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const TensorType& dataType,
+ const TensorShape& dataShape) const {
+ const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+ const auto shapeRelu = std::make_shared<ngraph::opset3::Relu>(shape);
+
+ return std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{shapeRelu},
+ ngraph::ParameterVector{shape},
+ "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeShapeOfWithOutRemoveDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeShapeOfWithOutRemoveDSR, testing::Combine(
+ testing::Values(
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ TensorShape{1000},
+ TensorShape{4, 1000},
+ TensorShape{3, 128, 256},
+ TensorShape{2, 3, 128, 256})
+));
+
+class DynamicToStaticShapeShapeOfKeepDSR : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& data_shape = std::get<1>(parameters);
+
+ ngraph::helpers::CompareFunctions(*transform(data_type, data_shape),
+ *reference(data_type, data_shape));
+ }
+
+protected:
+ std::shared_ptr<const ngraph::Function> transform(
+ const TensorType& dataType,
+ const TensorShape& dataShape) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataShape);
+ const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, shape);
+ const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(dsr->output(0));
+ const auto dsrOutputRelu = std::make_shared<ngraph::opset3::Relu>(dsr->output(0));
+ const auto shapeOfOutputRelu = std::make_shared<ngraph::opset3::Relu>(shapeOf->output(0));
+
+ const auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{dsrOutputRelu},
+ ngraph::ParameterVector{data, shape},
+ "Actual");
+
+ vpu::DynamicToStaticShapeShapeOf().run_on_function(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const TensorType& dataType,
+ const TensorShape& dataShape) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataShape);
+ const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, shape);
+ const auto shapeRelu = std::make_shared<ngraph::opset3::Relu>(shape);
+ const auto dsrOutputRelu = std::make_shared<ngraph::opset3::Relu>(dsr->output(0));
+
+ return std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{dsrOutputRelu},
+ ngraph::ParameterVector{data, shape},
+ "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeShapeOfKeepDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeShapeOfKeepDSR, testing::Combine(
+ testing::Values(
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ TensorShape{1000},
+ TensorShape{4, 1000},
+ TensorShape{3, 128, 256},
+ TensorShape{2, 3, 128, 256})
+));
+
+} // namespace
node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(node->get_output_partial_shape(0).rank()));
const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeSqueeze}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
transpose->set_output_type(0, dsr->get_input_element_type(0), makeDynamicShape(transposition->get_output_partial_shape(0)));
const auto transformations = vpu::Transformations{{ngraph::opset3::Transpose::type_info, vpu::dynamicToStaticShapeTranspose}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
const auto transformations = vpu::Transformations{{type_info, vpu::dynamicToStaticUnaryElementwise}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
ngraph::PartialShape::dynamic(node->get_output_partial_shape(0).rank() + unsqueeze_axes.size()));
const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeUnsqueeze}};
- vpu::DynamicToStaticShape(transformations).transform(*function);
+ vpu::DynamicToStaticShape(transformations).transform(function);
return function;
}
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_common.hpp>
+#include <ngraph/shape.hpp>
+#include <ngraph/type/element_type.hpp>
+#include <ngraph/op/parameter.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <numeric>
+#include <random>
+#include <ngraph/opsets/opset3.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp>
+#include <queue>
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+#include <vpu/utils/error.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataDims = ngraph::Shape;
+
+struct VariadicSplitTestCase {
+ ngraph::Shape data_shape;
+ std::vector<int64_t> split_lengths;
+ int64_t axis, first_split_point, second_split_point;
+};
+
+const auto combinations = testing::Combine(
+ testing::Values(
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ ngraph::element::i32,
+ ngraph::element::i64),
+ testing::Values(
+ VariadicSplitTestCase{{6}, {2, 1, 2, 1}, 0, 0, 0},
+ VariadicSplitTestCase{{6, 12, 10, 24}, {1, 1, 3, 1}, 0, 0, 1},
+ VariadicSplitTestCase{{6, 12}, {7, 2, 1, 2}, 1, 1, 2},
+ VariadicSplitTestCase{{6, 12, 10, 24}, {10, 14}, 3, 3, 4},
+ VariadicSplitTestCase{{6, 12, 10, 24}, {14, 10}, -1, 3, 4},
+ VariadicSplitTestCase{{6, 12, 10, 24}, {6}, -4, 0, 1}));
+
+
+class DynamicToStaticShapeVeriadicSplit : public CommonTestUtils::TestsCommon,
+ public testing::WithParamInterface<std::tuple<DataType, DataType, VariadicSplitTestCase>> {
+public:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& idx_type = std::get<1>(parameters);
+ const auto& variadic_split_setup = std::get<2>(parameters);
+
+ ngraph::helpers::CompareFunctions(*transform(data_type, idx_type, variadic_split_setup),
+ *reference(data_type, idx_type, variadic_split_setup));
+ }
+
+protected:
+ std::shared_ptr<const ngraph::Function> transform(
+ const ngraph::element::Type_t& data_type,
+ const ngraph::element::Type_t& idx_type,
+ const VariadicSplitTestCase& variadic_split_setup) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, variadic_split_setup.data_shape);
+ const auto axis = ngraph::opset3::Constant::create(idx_type, {}, std::vector<int64_t>{variadic_split_setup.axis});
+ const auto split_lengths = ngraph::opset3::Constant::create(idx_type,
+ {variadic_split_setup.split_lengths.size()}, std::vector<int64_t>{variadic_split_setup.split_lengths});
+
+ const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{variadic_split_setup.data_shape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+ const auto node = std::make_shared<ngraph::opset3::VariadicSplit>(dsr, axis, split_lengths);
+
+ const auto tests_wa = std::make_shared<ngraph::opset3::Concat>(node->outputs(), variadic_split_setup.axis);
+
+ auto outputShape = node->get_output_partial_shape(0);
+ const auto function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{tests_wa},
+ ngraph::ParameterVector{data, dims},
+ "Actual");
+ node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(variadic_split_setup.data_shape.size()));
+
+ const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeVariadicSplit}};
+ vpu::DynamicToStaticShape(transformations).transform(function);
+ return function;
+ }
+
+ std::shared_ptr<const ngraph::Function> reference(
+ const ngraph::element::Type_t& data_type,
+ const ngraph::element::Type_t& idx_type,
+ const VariadicSplitTestCase& variadic_split_setup) const {
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, variadic_split_setup.data_shape);
+ const auto axis = ngraph::opset3::Constant::create(idx_type, {}, std::vector<int64_t>{variadic_split_setup.axis});
+ const auto split_lengths = ngraph::opset3::Constant::create(idx_type,
+ {variadic_split_setup.split_lengths.size()}, std::vector<int64_t>{variadic_split_setup.split_lengths});
+
+ const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{variadic_split_setup.data_shape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+ const auto node = std::make_shared<ngraph::opset3::VariadicSplit>(dsr, axis, split_lengths);
+
+ ngraph::OutputVector first_shape_part, second_shape_part;
+ if (variadic_split_setup.first_split_point) {
+ std::vector<int64_t> idxs(variadic_split_setup.first_split_point);
+ std::iota(idxs.begin(), idxs.end(), 0);
+ first_shape_part.push_back(
+ std::make_shared<ngraph::opset3::Gather>(
+ dims,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+ }
+ if (variadic_split_setup.first_split_point + 1 < variadic_split_setup.data_shape.size()) {
+ std::vector<int64_t> idxs(variadic_split_setup.data_shape.size() - variadic_split_setup.second_split_point);
+ std::iota(idxs.begin(), idxs.end(), variadic_split_setup.second_split_point);
+ second_shape_part.push_back(
+ std::make_shared<ngraph::opset3::Gather>(
+ dims,
+ ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+ ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+ }
+ ngraph::NodeVector results;
+ for (auto i = 0; i < variadic_split_setup.split_lengths.size(); ++i) {
+ const auto dim = ngraph::opset3::Constant::create(dims->get_element_type(), {1}, {variadic_split_setup.split_lengths[i]});
+ if (!first_shape_part.empty() || !second_shape_part.empty()) {
+ ngraph::OutputVector output_dims{dim};
+ output_dims.insert(output_dims.begin(), first_shape_part.begin(), first_shape_part.end());
+ output_dims.insert(output_dims.end(), second_shape_part.begin(), second_shape_part.end());
+ const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+ results.push_back(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node->output(i), output_shape));
+ } else {
+ results.push_back(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node->output(i), dim));
+ }
+ }
+ const auto tests_wa = std::make_shared<ngraph::opset3::Concat>(results, variadic_split_setup.axis);
+
+ return std::make_shared<ngraph::Function>(
+ tests_wa,
+ ngraph::ParameterVector{data, dims},
+ "Expected");
+ }
+};
+
+TEST_P(DynamicToStaticShapeVeriadicSplit, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeVeriadicSplit, combinations);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <behavior/core_threading_tests.hpp>
+
+namespace {
+
+Params params[] = {
+ std::tuple<Device, Config> { "MYRIAD", { { CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(YES) } } },
+ std::tuple<Device, Config> { "HETERO", { { "TARGET_FALLBACK", "MYRIAD" } } },
+ std::tuple<Device, Config> { "MULTI", { { MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , "MYRIAD" } } }
+};
+
+} // namespace
+
+INSTANTIATE_TEST_CASE_P(MYRIAD, CoreThreadingTests, testing::ValuesIn(params));
+
+INSTANTIATE_TEST_CASE_P(DISABLED_MYRIAD, CoreThreadingTestsWithIterations,
+ testing::Combine(testing::ValuesIn(params),
+ testing::Values(2),
+ testing::Values(2)));
using namespace LayerTestsDefinitions;
namespace {
-// TODO: All concat on axis 0 always fails by accuracy
-std::vector<size_t > axes = {1, 2, 3};
+std::vector<size_t> axes = {0, 1, 2, 3};
std::vector<std::vector<std::vector<size_t>>> inShapes = {
{{10, 10, 10, 10}, {10, 10, 10, 10}},
{{10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}},
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP16};
-INSTANTIATE_TEST_CASE_P(Axis_1_and_3, ConcatLayerTest,
+INSTANTIATE_TEST_CASE_P(Concat_Basic, ConcatLayerTest,
::testing::Combine(
- ::testing::Values(1, 3),
+ ::testing::ValuesIn(axes),
::testing::ValuesIn(inShapes),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
ConcatLayerTest::getTestCaseName);
-
-
-// TODO: concat on axis 2 fails by accuracy with input precision different from FP16
-INSTANTIATE_TEST_CASE_P(Axis_2, ConcatLayerTest,
- ::testing::Combine(
- ::testing::Values(2),
- ::testing::ValuesIn(inShapes),
- ::testing::ValuesIn(netPrecisions),
- ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
- ConcatLayerTest::getTestCaseName);
-} // namespace
\ No newline at end of file
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/proposal.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace ngraph::helpers;
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+/* ============= Proposal ============= */
+const std::vector<base_size_type> base_size_ = {16};
+const std::vector<pre_nms_topn_type> pre_nms_topn_ = {100};
+const std::vector<post_nms_topn_type> post_nms_topn_ = {100};
+const std::vector<nms_thresh_type> nms_thresh_ = {0.7f};
+const std::vector<min_size_type> min_size_ = {1};
+const std::vector<ratio_type> ratio_ = {{1.0f, 2.0f}};
+const std::vector<scale_type> scale_ = {{1.2f, 1.5f}};
+const std::vector<clip_before_nms_type> clip_before_nms_ = {false};
+const std::vector<clip_after_nms_type> clip_after_nms_ = {false};
+
+// empty string corresponds to Caffe framework
+// Myriad plugin does not take this parameter; uses "" by default
+const std::vector<framework_type> framework_ = {""};
+
+const auto proposalParams = ::testing::Combine(
+ ::testing::ValuesIn(base_size_),
+ ::testing::ValuesIn(pre_nms_topn_),
+ ::testing::ValuesIn(post_nms_topn_),
+ ::testing::ValuesIn(nms_thresh_),
+ ::testing::ValuesIn(min_size_),
+ ::testing::ValuesIn(ratio_),
+ ::testing::ValuesIn(scale_),
+ ::testing::ValuesIn(clip_before_nms_),
+ ::testing::ValuesIn(clip_after_nms_),
+ ::testing::ValuesIn(framework_)
+);
+
+INSTANTIATE_TEST_CASE_P(Proposal_tests, ProposalLayerTest,
+ ::testing::Combine(
+ proposalParams,
+ ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
+ ProposalLayerTest::getTestCaseName
+);
+
+} // namespace
std::vector<std::string> disabledTestPatterns() {
return {
+ // Issue 26268
+ ".*ConcatLayerTest.*axis=0.*",
+ // Not supported activation types
".*ActivationLayerTest\\.CompareWithRefs/Tanh.*netPRC=FP32.*",
".*ActivationLayerTest\\.CompareWithRefs/Exp.*netPRC=FP32.*",
".*ActivationLayerTest\\.CompareWithRefs/Log.*netPRC=FP32.*",
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/static_shape_nonzero.hpp"
+
+#include "vpu/private_plugin_config.hpp"
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <functional_test_utils/blob_utils.hpp>
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <precision_utils.h>
+#include <ngraph/opsets/opset3.hpp>
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <random>
+
+typedef std::tuple<
+ InferenceEngine::SizeVector, // Input shape
+ InferenceEngine::Precision, // Input precision
+ LayerTestsUtils::TargetDevice // Device name
+> staticShapeNonZeroLayerTestParams;
+
+namespace LayerTestsDefinitions {
+
+class StaticShapeNonZeroLayerTest : public testing::WithParamInterface<staticShapeNonZeroLayerTestParams>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<staticShapeNonZeroLayerTestParams> obj) {
+ InferenceEngine::SizeVector inputShape;
+ InferenceEngine::Precision inputPrecision;
+ std::string targetDevice;
+ std::tie(inputShape, inputPrecision, targetDevice) = obj.param;
+
+ std::ostringstream result;
+ result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+ result << "inPrc=" << inputPrecision.name() << "_";
+ result << "targetDevice=" << targetDevice;
+ return result.str();
+ }
+
+protected:
+ void SetUp() override {
+ SetRefMode(LayerTestsUtils::RefMode::INTERPRETER);
+ configuration[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
+ configuration[VPU_CONFIG_KEY(DISABLE_REORDER)] = CONFIG_VALUE(YES);
+
+ InferenceEngine::SizeVector inputShape;
+ std::tie(inputShape, inPrc, targetDevice) = this->GetParam();
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
+
+ const auto input = std::make_shared<ngraph::opset3::Parameter>(ngPrc, ngraph::Shape(inputShape));
+ const auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(input, ngraph::element::i32);
+ ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(staticShapeNonZero->output(0)),
+ std::make_shared<ngraph::opset3::Result>(staticShapeNonZero->output(1))};
+ function = std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{input});
+ outPrc = InferenceEngine::Precision::I32;
+ }
+
+ InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
+ return FuncTestUtils::createAndFillBlobFloat(info.getTensorDesc(), 4, -2, 2);
+ }
+
+ void Compare(const std::vector<std::vector<std::uint8_t>>& expectedOutput, const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) override {
+ const auto expectedIndicesPtr = reinterpret_cast<const int32_t*>(expectedOutput[0].data());
+ const auto expectedDimsPtr = reinterpret_cast<const int32_t*>(expectedOutput[1].data());
+
+ const auto actualIndices = actualOutputs[0];
+ const auto actualDims = actualOutputs[1];
+
+ const auto actualIndicesPtr = InferenceEngine::as<InferenceEngine::MemoryBlob>(actualIndices)->rmap().as<const int32_t*>();
+ const auto actualDimsPtr = InferenceEngine::as<InferenceEngine::MemoryBlob>(actualDims)->rmap().as<const int32_t*>();
+
+ ASSERT_EQ(expectedDimsPtr[0], actualDimsPtr[0]);
+ ASSERT_EQ(expectedDimsPtr[1], actualDimsPtr[1]);
+
+ const auto totalDimsSize = actualIndices->getTensorDesc().getDims()[1];
+
+ for (int axis = 0; axis < actualDimsPtr[1]; ++axis) {
+ for (int i = 0; i < actualDimsPtr[0]; ++i) {
+ const auto idx = i + axis * totalDimsSize;
+ ASSERT_EQ(expectedIndicesPtr[idx], actualIndicesPtr[idx]);
+ }
+ }
+ }
+};
+
+TEST_P(StaticShapeNonZeroLayerTest, accuracy) {
+ Run();
+}
+
+std::vector<InferenceEngine::SizeVector> inputDims = {
+ { 7 },
+ { 1000 },
+ { 3, 5 },
+ { 65, 33 },
+ { 33, 65 },
+ { 1, 1000 },
+ { 223, 217, 21 },
+ { 3, 4, 5, 1 },
+ { 3, 4, 1, 5, 1 }
+};
+
+std::vector<InferenceEngine::Precision> inputPrecisions = {
+ InferenceEngine::Precision::U8,
+ InferenceEngine::Precision::FP32,
+ InferenceEngine::Precision::I32,
+};
+
+INSTANTIATE_TEST_CASE_P(accuracy, StaticShapeNonZeroLayerTest,
+ ::testing::Combine(
+ ::testing::ValuesIn(inputDims),
+ ::testing::ValuesIn(inputPrecisions),
+ ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)));
+
+} // namespace LayerTestsDefinitions
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <functional_test_utils/layer_test_utils.hpp>
+
+#include <ngraph_functions/builders.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type;
+using DataShape = ngraph::Shape;
+using DataShapes = std::vector<DataShape>;
+
+struct ConcatParam {
+ DataShapes dataShapes;
+ int axis;
+};
+using ConcatTestParam = std::tuple<DataType, ConcatParam, LayerTestsUtils::TargetDevice>;
+
+class DSR_Concat
+ : public testing::WithParamInterface<ConcatTestParam>,
+ public LayerTestsUtils::LayerTestsCommon {
+protected:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& dataType = std::get<0>(parameters);
+ const auto& concatParam = std::get<1>(parameters);
+ targetDevice = std::get<2>(GetParam());
+
+ const auto& dataShapes = concatParam.dataShapes;
+ const auto& axis = concatParam.axis;
+
+ ngraph::NodeVector dsrVector;
+ ngraph::ParameterVector params;
+ for (const auto& dataShape : dataShapes) {
+ const auto param = std::make_shared<ngraph::opset3::Parameter>(
+ dataType, dataShape);
+ const auto shape = std::make_shared<ngraph::opset3::Parameter>(
+ ngraph::element::i64, ngraph::Shape{dataShape.size()});
+ dsrVector.emplace_back(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+ param, shape));
+ params.push_back(param);
+ params.push_back(shape);
+ }
+
+ const auto concat = std::make_shared<ngraph::opset3::Concat>(dsrVector, axis);
+ const auto result = std::make_shared<ngraph::opset3::Result>(concat);
+
+ function = std::make_shared<ngraph::Function>(
+ ngraph::NodeVector{result}, params, "DSR-Concat");
+ }
+};
+
+TEST_P(DSR_Concat, CompareWithReference) {
+ Run();
+}
+
+std::vector<ngraph::element::Type> dataTypes = {
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8,
+};
+
+std::vector<ConcatParam> concatParams = {
+ {DataShapes{DataShape{128}, DataShape{256}, DataShape{512}, DataShape{1024}}, 0},
+ {DataShapes{DataShape{1, 1000}, DataShape{2, 1000}, DataShape{4, 1000}, DataShape{8, 1000}}, 0},
+ {DataShapes{DataShape{128, 100}, DataShape{128, 200}, DataShape{128, 400}, DataShape{128, 800}}, 1},
+ {DataShapes{DataShape{3, 64, 128}, DataShape{4, 64, 128}, DataShape{5, 64, 128}}, 0},
+ {DataShapes{DataShape{3, 64, 128}, DataShape{3, 64, 256}, DataShape{3, 64, 512}}, 2},
+};
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicConcat, DSR_Concat, ::testing::Combine(
+ ::testing::ValuesIn(dataTypes),
+ ::testing::ValuesIn(concatParams),
+ ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)));
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <ngraph_functions/builders.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataDims = ngraph::Shape;
+
+
+struct GatherTestCase {
+ ngraph::Shape data_shape, index_shape;
+ int64_t axis, first_split_point, second_split_point;
+};
+
+const auto combinations = testing::Combine(
+ testing::Values(
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ GatherTestCase{{6}, {15, 4, 20, 28}, 0, 0, 0},
+ GatherTestCase{{6, 12, 10, 24}, {6}, 0, 0, 1},
+ GatherTestCase{{6, 12}, {15, 4, 20, 28}, 1, 1, 2},
+ GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, 3, 3, 4},
+ GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, -1, 3, 4},
+ GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, -4, 0, 1}),
+ testing::Values(CommonTestUtils::DEVICE_MYRIAD));
+
+
+using Parameters = std::tuple<
+ DataType,
+ DataType,
+ GatherTestCase,
+ LayerTestsUtils::TargetDevice
+>;
+
+class DSR_GatherData : public testing::WithParamInterface<Parameters>,
+ public LayerTestsUtils::LayerTestsCommon {
+protected:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& idx_type = std::get<1>(parameters);
+ const auto& gather_setup = std::get<2>(parameters);
+ targetDevice = std::get<3>(parameters);
+
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+ const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+ const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+ const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+ const auto node = std::make_shared<ngraph::opset3::Gather>(dsr, indices, axis);
+
+ const auto result = std::make_shared<ngraph::opset3::Result>(node);
+ function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+ ngraph::ParameterVector{data, indices, dims}, "DSR-GatherData");
+ }
+};
+
+TEST_P(DSR_GatherData, CompareWithReference) {
+ Run();
+}
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicGatherData, DSR_GatherData, combinations);
+
+class DSR_GatherIdx : public testing::WithParamInterface<Parameters>,
+ public LayerTestsUtils::LayerTestsCommon {
+protected:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& idx_type = std::get<1>(parameters);
+ const auto& gather_setup = std::get<2>(parameters);
+ targetDevice = std::get<3>(parameters);
+
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+ const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+ const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+ const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, dims);
+ const auto node = std::make_shared<ngraph::opset3::Gather>(data, dsr, axis);
+
+ const auto result = std::make_shared<ngraph::opset3::Result>(node);
+ function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+ ngraph::ParameterVector{data, indices, dims}, "DSR-GatherIdx");
+ }
+};
+
+TEST_P(DSR_GatherIdx, CompareWithReference) {
+ Run();
+}
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicGatherIdx, DSR_GatherIdx, combinations);
+
+class DSR_Gather : public testing::WithParamInterface<Parameters>,
+ public LayerTestsUtils::LayerTestsCommon {
+protected:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& idx_type = std::get<1>(parameters);
+ const auto& gather_setup = std::get<2>(parameters);
+ targetDevice = std::get<3>(parameters);
+
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+ const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+ const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+ const auto data_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+ const auto indices_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+ const auto data_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, data_dims);
+ const auto indices_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, indices_dims);
+ const auto node = std::make_shared<ngraph::opset3::Gather>(data_dsr, indices_dsr, axis);
+
+ const auto result = std::make_shared<ngraph::opset3::Result>(node);
+ function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+ ngraph::ParameterVector{data, indices, data_dims, indices_dims}, "DSR-Gather");
+ }
+};
+
+TEST_P(DSR_Gather, CompareWithReference) {
+ Run();
+}
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicGatherIdx, DSR_Gather, combinations);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <ngraph_functions/builders.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataShape = ngraph::Shape;
+using ShapeDescriptor = std::vector<int32_t>;
+using ReshapeTestParams = std::tuple<DataShape, bool, ShapeDescriptor>;
+
+using Parameters = std::tuple<
+ DataType,
+ ReshapeTestParams,
+ LayerTestsUtils::TargetDevice
+>;
+
+class DSR_Reshape : public testing::WithParamInterface<Parameters>, public LayerTestsUtils::LayerTestsCommon {
+protected:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& inDataType = std::get<0>(GetParam());
+ const auto& reshapeTestParams = std::get<1>(GetParam());
+ targetDevice = std::get<2>(GetParam());
+
+ const auto& inDataShape = std::get<0>(reshapeTestParams);
+ const auto& specialZero = std::get<1>(reshapeTestParams);
+ const auto& outShapeDescriptor = std::get<2>(reshapeTestParams);
+
+ const auto inDataParam = std::make_shared<ngraph::op::Parameter>(
+ inDataType, inDataShape);
+ const auto inDataShapeParam = std::make_shared<ngraph::op::Parameter>(
+ ngraph::element::i32, ngraph::Shape{inDataShape.size()});
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+ inDataParam, inDataShapeParam);
+
+ const auto outShapeDescriptorConstNode = std::make_shared<ngraph::op::Constant>(
+ ngraph::element::i64, ngraph::Shape{outShapeDescriptor.size()}, outShapeDescriptor);
+ const auto reshape = std::make_shared<ngraph::op::v1::Reshape>(
+ dsr, outShapeDescriptorConstNode, specialZero);
+
+ const auto result = std::make_shared<ngraph::op::Result>(reshape);
+ function = std::make_shared<ngraph::Function>(
+ ngraph::ResultVector{result},
+ ngraph::ParameterVector{inDataParam, inDataShapeParam},
+ "DSR-Reshape");
+ }
+};
+
+TEST_P(DSR_Reshape, CompareWithReference) {
+ Run();
+}
+
+std::vector<ReshapeTestParams> reshapeTestParams = {
+ std::make_tuple(DataShape{1, 5, 5, 24}, true, ShapeDescriptor{0, -1, 4}),
+ std::make_tuple(DataShape{1, 5, 5, 0}, false, ShapeDescriptor{0, 4}),
+ std::make_tuple(DataShape{1, 3, 128, 256}, true, ShapeDescriptor{0, 0, 64, 512}),
+};
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicReshape, DSR_Reshape,
+ ::testing::Combine(
+ ::testing::Values(ngraph::element::f16, ngraph::element::f32, ngraph::element::i32),
+ ::testing::ValuesIn(reshapeTestParams),
+ ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)));
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <ngraph_functions/builders.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataDims = ngraph::Shape;
+
+
+struct VariadicSplitTestCase {
+ ngraph::Shape data_shape;
+ std::vector<int64_t> split_lengths;
+ int64_t axis, first_split_point, second_split_point;
+};
+
+const auto combinations = testing::Combine(
+ testing::Values(
+ ngraph::element::f16,
+ ngraph::element::f32,
+ ngraph::element::i32,
+ ngraph::element::i64,
+ ngraph::element::u8),
+ testing::Values(
+ ngraph::element::i32,
+ ngraph::element::i64),
+ testing::Values(
+ VariadicSplitTestCase{{6}, {2, 1, 2, 1}, 0, 0, 0},
+ VariadicSplitTestCase{{6, 12, 10, 24}, {1, 1, 3, 1}, 0, 0, 1},
+ VariadicSplitTestCase{{6, 12}, {7, 2, 1, 2}, 1, 1, 2},
+ VariadicSplitTestCase{{6, 12, 10, 24}, {10, 14}, 3, 3, 4},
+ VariadicSplitTestCase{{6, 12, 10, 24}, {14, 10}, -1, 3, 4},
+ VariadicSplitTestCase{{6, 12, 10, 24}, {6}, -4, 0, 1}),
+ testing::Values(CommonTestUtils::DEVICE_MYRIAD));
+
+
+using Parameters = std::tuple<
+ DataType,
+ DataType,
+ VariadicSplitTestCase,
+ LayerTestsUtils::TargetDevice
+>;
+
+class DSR_VariadicSplit : public testing::WithParamInterface<Parameters>,
+ public LayerTestsUtils::LayerTestsCommon {
+protected:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& data_type = std::get<0>(parameters);
+ const auto& idx_type = std::get<1>(parameters);
+ const auto& variadic_split_setup = std::get<2>(parameters);
+ targetDevice = std::get<3>(parameters);
+
+ const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, variadic_split_setup.data_shape);
+ const auto axis = ngraph::opset3::Constant::create(idx_type, {}, std::vector<int64_t>{variadic_split_setup.axis});
+ const auto split_lengths = ngraph::opset3::Constant::create(idx_type,
+ {variadic_split_setup.split_lengths.size()}, std::vector<int64_t>{variadic_split_setup.split_lengths});
+
+ const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{variadic_split_setup.data_shape.size()});
+
+ const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+ const auto node = std::make_shared<ngraph::opset3::VariadicSplit>(dsr, axis, split_lengths);
+
+ const auto tests_wa = std::make_shared<ngraph::opset3::Concat>(node->outputs(), variadic_split_setup.axis);
+ const auto result = std::make_shared<ngraph::opset3::Result>(tests_wa);
+ function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+ ngraph::ParameterVector{data, dims}, "DSR-VariadicSplit");
+ }
+};
+
+TEST_P(DSR_VariadicSplit, CompareWithReference) {
+ Run();
+}
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicGatherData, DSR_VariadicSplit, combinations);
+
+} // namespace
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <ngraph_functions/builders.hpp>
+
+namespace {
+
+using TensorType = ngraph::element::Type;
+using TensorShape = ngraph::PartialShape;
+
+using BroadcastExplicitTestParams = std::tuple<
+ TensorType, TensorShape, LayerTestsUtils::TargetDevice>;
+
+class NonZero_Broadcast : public testing::WithParamInterface<BroadcastExplicitTestParams>,
+ public LayerTestsUtils::LayerTestsCommon {
+protected:
+ void SetUp() override {
+ const auto& parameters = GetParam();
+ const auto& tensorType = std::get<0>(parameters);
+ const auto& tensorShape = std::get<1>(parameters);
+ targetDevice = std::get<2>(GetParam());
+
+ const auto tensorParam = std::make_shared<ngraph::opset3::Parameter>(
+ tensorType, tensorShape);
+ const auto nonZero = std::make_shared<ngraph::opset3::NonZero>(tensorParam);
+ const auto shapeOfNonZero = std::make_shared<ngraph::opset3::ShapeOf>(nonZero);
+
+ const auto broadcastConstant = std::make_shared<ngraph::opset3::Constant>(
+ tensorType, ngraph::Shape{1}, 1);
+
+ const auto axesMappingConstant = std::make_shared<ngraph::opset3::Constant>(
+ ngraph::element::u64, ngraph::Shape{1}, 0);
+
+ const auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(
+ broadcastConstant, shapeOfNonZero, axesMappingConstant);
+
+ const auto result = std::make_shared<ngraph::opset3::Result>(broadcast);
+
+ function = std::make_shared<ngraph::Function>(
+ ngraph::ResultVector{result},
+ ngraph::ParameterVector{tensorParam},
+ "NonZero-Broadcast");
+ }
+};
+
+TEST_P(NonZero_Broadcast, CompareWithReference) {
+ Run();
+}
+// Blocked by #-30913, #-30915
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicBroadcast, NonZero_Broadcast, ::testing::Combine(
+ ::testing::Values(ngraph::element::f16, ngraph::element::f32, ngraph::element::i32),
+ ::testing::Values(
+ TensorShape{1000},
+ TensorShape{4, 1000},
+ TensorShape{3, 128, 256}),
+ ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)));
+
+} // namespace
#include <functional_test_utils/layer_test_utils.hpp>
#include <ngraph_functions/builders.hpp>
#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <vpu/myriad_plugin_config.hpp>
namespace {
LayerTestsUtils::TargetDevice
>;
-class DSR_Transpose : public testing::WithParamInterface<Parameters>, public LayerTestsUtils::LayerTestsCommon {
+class NonZero_Transpose : public testing::WithParamInterface<Parameters>, public LayerTestsUtils::LayerTestsCommon {
protected:
void SetUp() override {
const auto& parameters = GetParam();
targetDevice = std::get<2>(GetParam());
const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataDims);
- const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataDims.size()});
- const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+ const auto nonZero = std::make_shared<ngraph::opset3::NonZero>(data);
auto permutation = std::vector<std::int64_t>(dataDims.size());
std::iota(permutation.begin(), permutation.end(), 0);
std::shuffle(permutation.begin(), permutation.end(), std::mt19937());
const auto transposition = std::make_shared<ngraph::opset3::Constant>(ngraph::element::i64, ngraph::Shape{dataDims.size()}, permutation);
- const auto transpose = std::make_shared<ngraph::opset3::Transpose>(dsr, transposition);
+ const auto transpose = std::make_shared<ngraph::opset3::Transpose>(nonZero, transposition);
const auto result = std::make_shared<ngraph::opset3::Result>(transpose);
- function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{data, dims}, "DSR-Transpose");
+ function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{data}, "NonZero-Transpose");
}
};
-TEST_P(DSR_Transpose, CompareWithReference) {
- Run();
+TEST_P(NonZero_Transpose, CompareWithReference) {
+ SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+ configuration.emplace(VPU_MYRIAD_CONFIG_KEY(PLATFORM), VPU_MYRIAD_CONFIG_VALUE(2480));
+ ConfigurePlugin();
+
+ ASSERT_NO_THROW(LoadNetwork());
}
-INSTANTIATE_TEST_CASE_P(DISABLED_DynamicTranspose, DSR_Transpose,
+INSTANTIATE_TEST_CASE_P(DynamicTranspose, NonZero_Transpose,
::testing::Combine(
::testing::Values(ngraph::element::f16, ngraph::element::f32, ngraph::element::i32),
::testing::Values(ngraph::Shape{1, 800}),
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_core.hpp>
+#include <details/ie_exception.hpp>
+#include <ie_plugin_config.hpp>
+#include <ie_extension.h>
+#include <multi-device/multi_device_config.hpp>
+
+#include <file_utils.h>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <functional_test_utils/test_model/test_model.hpp>
+#include <common_test_utils/file_utils.hpp>
+#include <common_test_utils/test_assertions.hpp>
+
+#include <gtest/gtest.h>
+#include <thread>
+#include <atomic>
+#include <mutex>
+#include <chrono>
+#include <fstream>
+
+using Device = std::string;
+using Config = std::map<std::string, std::string>;
+using Params = std::tuple<Device, Config>;
+
+class CoreThreadingTestsBase {
+public:
+ static void runParallel(std::function<void(void)> func,
+ const unsigned int iterations = 100,
+ const unsigned int threadsNum = 8) {
+ std::vector<std::thread> threads(threadsNum);
+
+ for (auto & thread : threads) {
+ thread = std::thread([&](){
+ for (unsigned int i = 0; i < iterations; ++i) {
+ func();
+ }
+ });
+ }
+
+ for (auto & thread : threads) {
+ if (thread.joinable())
+ thread.join();
+ }
+ }
+
+ void safePluginUnregister(InferenceEngine::Core & ie) {
+ try {
+ ie.UnregisterPlugin(deviceName);
+ } catch (const InferenceEngine::details::InferenceEngineException & ex) {
+ // if several threads unload plugin at once, the first thread does this
+ // while all others will throw an exception that plugin is not registered
+ ASSERT_STR_CONTAINS(ex.what(), "name is not registered in the");
+ }
+ }
+
+ void safeAddExtension(InferenceEngine::Core & ie) {
+ try {
+ auto extension = InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(
+ FileUtils::makeSharedLibraryName<char>({}, "extension_tests"));
+ ie.AddExtension(extension);
+ } catch (const InferenceEngine::details::InferenceEngineException & ex) {
+ ASSERT_STR_CONTAINS(ex.what(), "name: experimental");
+ }
+ }
+
+ Device deviceName;
+ Config config;
+};
+
+//
+// Common threading plugin tests
+//
+
+class CoreThreadingTests : public CoreThreadingTestsBase,
+ public ::testing::TestWithParam<Params> {
+public:
+ void SetUp() override {
+ std::tie(deviceName, config) = GetParam();
+ }
+};
+
+// tested function: GetVersions, UnregisterPlugin
+TEST_P(CoreThreadingTests, smoke_GetVersions) {
+ InferenceEngine::Core ie;
+
+ runParallel([&] () {
+ auto versions = ie.GetVersions(deviceName);
+ ASSERT_LE(1u, versions.size());
+ safePluginUnregister(ie);
+ });
+}
+
+// tested function: SetConfig for already created plugins
+TEST_P(CoreThreadingTests, smoke_SetConfigPluginExists) {
+ InferenceEngine::Core ie;
+
+ ie.SetConfig(config);
+ auto versions = ie.GetVersions(deviceName);
+
+ runParallel([&] () {
+ ie.SetConfig(config);
+ }, 10000);
+}
+
+// tested function: GetConfig, UnregisterPlugin
+TEST_P(CoreThreadingTests, smoke_GetConfig) {
+ InferenceEngine::Core ie;
+ std::string configKey = config.begin()->first;
+
+ ie.SetConfig(config);
+ runParallel([&] () {
+ ie.GetConfig(deviceName, configKey);
+ safePluginUnregister(ie);
+ });
+}
+
+// tested function: GetMetric, UnregisterPlugin
+TEST_P(CoreThreadingTests, smoke_GetMetric) {
+ InferenceEngine::Core ie;
+ runParallel([&] () {
+ ie.GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+ safePluginUnregister(ie);
+ });
+}
+
+// tested function: QueryNetwork
+TEST_P(CoreThreadingTests, smoke_QueryNetwork) {
+ InferenceEngine::Core ie;
+ auto model = FuncTestUtils::TestModel::convReluNormPoolFcModelFP32;
+ auto network = ie.ReadNetwork(model.model_xml_str, model.weights_blob);
+
+ ie.SetConfig(config, deviceName);
+ InferenceEngine::QueryNetworkResult refResult = ie.QueryNetwork(network, deviceName);
+
+ runParallel([&] () {
+ const auto result = ie.QueryNetwork(network, deviceName);
+ safePluginUnregister(ie);
+
+ // compare QueryNetworkResult with reference
+ for (auto && r : refResult.supportedLayersMap) {
+ ASSERT_NE(result.supportedLayersMap.end(), result.supportedLayersMap.find(r.first));
+ }
+ for (auto && r : result.supportedLayersMap) {
+ ASSERT_NE(refResult.supportedLayersMap.end(), refResult.supportedLayersMap.find(r.first));
+ }
+ }, 3000);
+}
+
+//
+// Parametrized tests with numfer of parallel threads, iterations
+//
+
+using Threads = unsigned int;
+using Iterations = unsigned int;
+
+class CoreThreadingTestsWithIterations : public ::testing::TestWithParam<std::tuple<Params, Threads, Iterations> >,
+ public CoreThreadingTestsBase {
+public:
+ void SetUp() override {
+ std::tie(deviceName, config) = std::get<0>(GetParam());
+ numThreads = std::get<1>(GetParam());
+ numIterations = std::get<2>(GetParam());
+ }
+
+ unsigned int numIterations;
+ unsigned int numThreads;
+};
+
+// tested function: LoadNetwork, AddExtension
+TEST_P(CoreThreadingTestsWithIterations, smoke_LoadNetwork) {
+ InferenceEngine::Core ie;
+ std::atomic<unsigned int> counter{0u};
+
+ const FuncTestUtils::TestModel::TestModel models[] = {
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP32,
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP16
+ };
+ std::vector<InferenceEngine::CNNNetwork> networks;
+ for (auto & model : models) {
+ networks.emplace_back(ie.ReadNetwork(model.model_xml_str, model.weights_blob));
+ }
+
+ // TODO: uncomment after fixing *-31414
+ // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::make2InputSubtract()));
+ // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeMultiSingleConv()));
+ // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSingleConv()));
+ // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSplitConvConcat()));
+ // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSplitMultiConvConcat()));
+
+ ie.SetConfig(config, deviceName);
+ runParallel([&] () {
+ auto value = counter++;
+ (void)ie.LoadNetwork(networks[(counter++) % networks.size()], deviceName);
+ }, numIterations, numThreads);
+}
namespace LayerTestsDefinitions {
using concatParamsTuple = typename std::tuple<
+ //TODO: according to specification axis have to be int, negative values are allowed
size_t, // Concat axis
std::vector<std::vector<size_t>>, // Input shapes
InferenceEngine::Precision, // Network precision
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+namespace LayerTestsDefinitions {
+
+namespace proposalTypes {
+
+typedef size_t base_size_type;
+typedef size_t pre_nms_topn_type;
+typedef size_t post_nms_topn_type;
+typedef float nms_thresh_type;
+typedef size_t min_size_type;
+typedef std::vector<float> ratio_type;
+typedef std::vector<float> scale_type;
+typedef bool clip_before_nms_type;
+typedef bool clip_after_nms_type;
+typedef bool normalize_type;
+typedef size_t feat_stride_type;
+typedef float box_size_scale_type;
+typedef float box_coordinate_scale_type;
+typedef std::string framework_type;
+
+}; // namespace proposalTypes
+
+using namespace proposalTypes;
+
+typedef std::tuple<
+ base_size_type,
+ pre_nms_topn_type,
+ post_nms_topn_type,
+ nms_thresh_type,
+ min_size_type,
+ ratio_type,
+ scale_type,
+ clip_before_nms_type,
+ clip_after_nms_type,
+ framework_type> proposalSpecificParams;
+typedef std::tuple<
+ proposalSpecificParams,
+ std::string> proposalLayerTestParamsSet;
+
+class ProposalLayerTest
+ : public testing::WithParamInterface<proposalLayerTestParamsSet>,
+ public LayerTestsUtils::LayerTestsCommon {
+public:
+ static std::string getTestCaseName(testing::TestParamInfo<proposalLayerTestParamsSet> obj);
+ InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
+
+protected:
+ void SetUp() override;
+ void Validate() override;
+};
+
+} // namespace LayerTestsDefinitions
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
convertFuncToF32(fnPtr, netPrecision);
auto refOutData = ngraph::helpers::inferFnWithInterp<ngraph::element::Type_t::f32>(fnPtr, inRawData);
- auto thr = FuncTestUtils::GetComparisonThreshold(netPrecision);
+ float thr1, thr2;
+ FuncTestUtils::GetComparisonThreshold(netPrecision, thr1, thr2);
+
size_t outElementsCount = std::accumulate(begin(fnPtr->get_output_shape(0)), end(fnPtr->get_output_shape(0)), 1,
std::multiplies<size_t>());
- FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<float *>(), *refOutData[0], outElementsCount,
- outElementsCount,
- thr);
+ FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<float *>(), *refOutData[0],
+ outElementsCount, outElementsCount,
+ FuncTestUtils::CompareType::ABS_AND_REL,
+ thr1, thr2);
fnPtr.reset();
if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
PluginCache::get().reset();
namespace LayerTestsDefinitions {
std::string ConcatLayerTest::getTestCaseName(const testing::TestParamInfo<concatParamsTuple> &obj) {
- size_t axis;
+ int axis;
std::vector<std::vector<size_t>> inputShapes;
InferenceEngine::Precision netPrecision;
std::string targetName;
}
void ConcatLayerTest::SetUp() {
- size_t axis;
+ int axis;
std::vector<std::vector<size_t>> inputShape;
InferenceEngine::Precision netPrecision;
std::tie(axis, inputShape, netPrecision, targetDevice) = this->GetParam();
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/proposal.hpp"
+
+namespace LayerTestsDefinitions {
+
+const normalize_type normalize = true;
+const feat_stride_type feat_stride = 1;
+const box_size_scale_type box_size_scale = 2.0f;
+const box_coordinate_scale_type box_coordinate_scale = 2.0f;
+
+std::string ProposalLayerTest::getTestCaseName(testing::TestParamInfo<proposalLayerTestParamsSet> obj) {
+ proposalSpecificParams proposalParams;
+
+ std::string targetDevice;
+ std::tie(proposalParams, targetDevice) = obj.param;
+
+ base_size_type base_size;
+ pre_nms_topn_type pre_nms_topn;
+ post_nms_topn_type post_nms_topn;
+ nms_thresh_type nms_thresh;
+ min_size_type min_size;
+ ratio_type ratio;
+ scale_type scale;
+ clip_before_nms_type clip_before_nms;
+ clip_after_nms_type clip_after_nms;
+ framework_type framework;
+ std::tie(base_size, pre_nms_topn,
+ post_nms_topn,
+ nms_thresh,
+ min_size,
+ ratio,
+ scale,
+ clip_before_nms,
+ clip_after_nms,
+ framework) = proposalParams;
+
+ std::ostringstream result;
+ result << "base_size=" << base_size << "_";
+ result << "pre_nms_topn=" << pre_nms_topn << "_";
+ result << "post_nms_topn=" << post_nms_topn << "_";
+ result << "nms_thresh=" << nms_thresh << "_";
+ result << "feat_stride=" << feat_stride << "_";
+ result << "min_size=" << min_size << "_";
+ result << "ratio = " << CommonTestUtils::vec2str(ratio) << "_";
+ result << "scale = " << CommonTestUtils::vec2str(scale) << "_";
+ result << "clip_before_nms=" << clip_before_nms << "_";
+ result << "clip_after_nms=" << clip_after_nms << "_";
+ result << "normalize=" << normalize << "_";
+ result << "box_size_scale=" << box_size_scale << "_";
+ result << "box_coordinate_scale=" << box_coordinate_scale << "_";
+ result << "framework=" << framework << "_";
+ result << "targetDevice=" << targetDevice;
+
+ return result.str();
+}
+
+void ProposalLayerTest::SetUp() {
+ proposalSpecificParams proposalParams;
+
+ std::tie(proposalParams, targetDevice) = this->GetParam();
+ base_size_type base_size;
+ pre_nms_topn_type pre_nms_topn;
+ post_nms_topn_type post_nms_topn;
+ nms_thresh_type nms_thresh;
+ min_size_type min_size;
+ ratio_type ratio;
+ scale_type scale;
+ clip_before_nms_type clip_before_nms;
+ clip_after_nms_type clip_after_nms;
+ framework_type framework;
+
+ std::tie(base_size, pre_nms_topn,
+ post_nms_topn,
+ nms_thresh,
+ min_size,
+ ratio,
+ scale,
+ clip_before_nms,
+ clip_after_nms,
+ framework) = proposalParams;
+
+ size_t bottom_w = base_size;
+ size_t bottom_h = base_size;
+ size_t num_anchors = ratio.size() * scale.size();
+
+ std::vector<size_t> scoresShape = {1, 2 * num_anchors, bottom_h, bottom_w};
+ std::vector<size_t> boxesShape = {1, 4 * num_anchors, bottom_h, bottom_w};
+ std::vector<size_t> imageInfoShape = {3};
+
+ auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(InferenceEngine::Precision::FP16);
+ auto params = ngraph::builder::makeParams(ngPrc, {{"scores", scoresShape}, {"boxes", boxesShape}, {"image_info", imageInfoShape}});
+ auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+ auto proposal = std::dynamic_pointer_cast<ngraph::opset1::Proposal>(
+ ngraph::builder::makeProposal(paramOuts[0], paramOuts[1], paramOuts[2], ngPrc,
+ base_size,
+ pre_nms_topn,
+ post_nms_topn,
+ nms_thresh,
+ feat_stride,
+ min_size,
+ ratio,
+ scale,
+ clip_before_nms,
+ clip_after_nms,
+ normalize,
+ box_size_scale,
+ box_coordinate_scale,
+ framework));
+
+ ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(proposal)};
+ function = std::make_shared<ngraph::Function>(results, params, "proposal");
+}
+
+InferenceEngine::Blob::Ptr ProposalLayerTest::GenerateInput(const InferenceEngine::InputInfo &info) const {
+ InferenceEngine::Blob::Ptr blobPtr;
+
+ const std::string name = info.name();
+ if (name == "scores") {
+ blobPtr = FuncTestUtils::createAndFillBlobFloat(info.getTensorDesc(), 1, 0, 1000, 8234231);
+ } else if (name == "boxes") {
+ blobPtr = FuncTestUtils::createAndFillBlobFloatNormalDistribution(info.getTensorDesc(), 0.0f, 0.2f, 7235346);
+ } else if (name == "image_info") {
+ const float image_info[] = {225.0f, 225.0f, 1.0f};
+ blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), image_info, 3);
+ }
+
+ return blobPtr;
+}
+
+// TODO: for validation, reference version is required (#28373)
+void ProposalLayerTest::Validate() {}
+
+TEST_P(ProposalLayerTest, CompareWithRefs) {
+ Run();
+}
+} // namespace LayerTestsDefinitions
std::vector<int32_t> convRefOutData(outElementsCount);
for (size_t i = 0; i < outElementsCount; i++)
convRefOutData[i] = static_cast<int32_t>(refOutData[i]);
- FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<int32_t *>(), convRefOutData.data(), outElementsCount, outElementsCount);
+ FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<int32_t *>(), convRefOutData.data(),
+ outElementsCount, outElementsCount, FuncTestUtils::CompareType::ABS_AND_REL);
} else {
- auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
- FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<float *>(), refOutData.data(), outElementsCount, outElementsCount, thr);
+ float thr1, thr2;
+ FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32, thr1, thr2);
+ FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<float *>(), refOutData.data(),
+ outElementsCount, outElementsCount,
+ FuncTestUtils::CompareType::ABS_AND_REL,
+ thr1, thr2);
}
layer.fnPtr.reset();
}
template<InferenceEngine::Precision::ePrecision PRC>
-void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) {
+void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k,
+ const int seed = 1) {
using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
- std::default_random_engine random(1);
+ std::default_random_engine random(seed);
// 1/k is the resolution of the floating point numbers
std::uniform_int_distribution<int32_t> distribution(k * start_from, k * (start_from + range));
}
}
+template<InferenceEngine::Precision::ePrecision PRC>
+void inline fill_data_normal_random_float(InferenceEngine::Blob::Ptr &blob,
+ const float mean,
+ const float stddev,
+ const int seed = 1) {
+ using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
+ std::default_random_engine random(seed);
+ std::normal_distribution<> normal_d{mean, stddev};
+
+ auto *rawBlobDataPtr = blob->buffer().as<dataType *>();
+ for (size_t i = 0; i < blob->size(); i++) {
+ auto value = static_cast<float>(normal_d(random));
+ if (typeid(dataType) == typeid(typename InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type)) {
+ rawBlobDataPtr[i] = ngraph::float16(value).to_bits();
+ } else {
+ rawBlobDataPtr[i] = value;
+ }
+ }
+}
+
+template<InferenceEngine::Precision::ePrecision PRC>
+void inline fill_data_float_array(InferenceEngine::Blob::Ptr &blob, const float values[], const size_t size) {
+ using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
+
+ auto *rawBlobDataPtr = blob->buffer().as<dataType *>();
+ for (size_t i = 0; i < std::min(size, blob->size()); i++) {
+ auto value = values[i];
+ if (typeid(dataType) == typeid(typename InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type)) {
+ rawBlobDataPtr[i] = ngraph::float16(value).to_bits();
+ } else {
+ rawBlobDataPtr[i] = value;
+ }
+ }
+}
+
template<>
void inline fill_data_random<InferenceEngine::Precision::FP32>(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) {
fill_data_random_float<InferenceEngine::Precision::FP32>(blob, range, start_from, k);
#include <gtest/gtest.h>
+#include <string>
+
+#include <ie_data.h>
#include <ie_blob.h>
+#include <ie_common.h>
+#include <ie_preprocess.hpp>
+#include <ie_icnn_network.hpp>
+
+namespace {
+
+bool strContains(const std::string & str, const std::string & substr) {
+ return str.find(substr) != std::string::npos;
+}
+
+bool strDoesnotContain(const std::string & str, const std::string & substr) {
+ (void)strDoesnotContain; // to overcome unused warning
+ return !strContains(str, substr);
+}
+
+} // namespace
+
+#define ASSERT_STR_CONTAINS(str, substr) \
+ ASSERT_PRED2(&strContains, str, substr)
+
+#define ASSERT_STR_DOES_NOT_CONTAIN(str, substr) \
+ ASSERT_PRED2(&strDoesnotContain, str, substr)
+
+#define EXPECT_STR_CONTAINS(str, substr) \
+ EXPECT_PRED2(&strContains, str, substr)
#define ASSERT_BLOB_EQ(lhs, rhs) \
-compare_blob(lhs,rhs)
+ compare_blob(lhs, rhs)
#define ASSERT_DIMS_EQ(lhs, rhs) \
-compare_dims(lhs,rhs)
+ compare_dims(lhs, rhs)
#define ASSERT_DATA_EQ(lhs, rhs) \
-compare_data(lhs,rhs)
+ compare_data(lhs, rhs)
#define ASSERT_PREPROCESS_CHANNEL_EQ(lhs, rhs) \
-compare_preprocess(lhs,rhs)
+ compare_preprocess(lhs, rhs)
#define ASSERT_PREPROCESS_INFO_EQ(lhs, rhs) \
-compare_preprocess_info(lhs,rhs)
+ compare_preprocess_info(lhs, rhs)
#define ASSERT_OUTPUTS_INFO_EQ(lhs, rhs) \
-compare_outputs_info(lhs,rhs)
+ compare_outputs_info(lhs, rhs)
#define ASSERT_INPUTS_INFO_EQ(lhs, rhs) \
-compare_inputs_info(lhs,rhs)
+ compare_inputs_info(lhs, rhs)
#define ASSERT_STRINGEQ(lhs, rhs) \
-compare_cpp_strings(lhs,rhs)
-
-
+ compare_cpp_strings(lhs, rhs)
inline void compare_blob(InferenceEngine::Blob::Ptr lhs, InferenceEngine::Blob::Ptr rhs) {
ASSERT_EQ(lhs.get(), rhs.get());
inline void compare_dims(const InferenceEngine::SizeVector & lhs, const InferenceEngine::SizeVector & rhs) {
ASSERT_EQ(lhs.size(), rhs.size());
- for(int i=0;i<lhs.size();i++) {
+ for (size_t i = 0; i < lhs.size(); i++) {
ASSERT_EQ(lhs[i], rhs[i]);
}
}
inline void compare_preprocess_info(const InferenceEngine::PreProcessInfo & lhs, const InferenceEngine::PreProcessInfo & rhs) {
ASSERT_EQ(lhs.getMeanVariant(), rhs.getMeanVariant());
ASSERT_EQ(lhs.getNumberOfChannels(), rhs.getNumberOfChannels());
- for(int i=0; i < lhs.getNumberOfChannels(); i++) {
+ for (int i = 0; i < lhs.getNumberOfChannels(); i++) {
ASSERT_PREPROCESS_CHANNEL_EQ(*lhs[i].get(), *rhs[i].get());
}
}
}
}
-inline void compare_inputs_info (const InferenceEngine::InputsDataMap & lhs, const InferenceEngine::InputsDataMap & rhs) {
+inline void compare_inputs_info(const InferenceEngine::InputsDataMap & lhs, const InferenceEngine::InputsDataMap & rhs) {
ASSERT_EQ(lhs.size(), rhs.size());
auto i = lhs.begin();
auto j = rhs.begin();
- for (int k =0; k != lhs.size(); k++, i++, j++) {
+ for (int k = 0; k != lhs.size(); k++, i++, j++) {
ASSERT_STREQ(i->first.c_str(), j->first.c_str());
ASSERT_DIMS_EQ(i->second->getTensorDesc().getDims(), j->second->getTensorDesc().getDims());
ASSERT_PREPROCESS_INFO_EQ(i->second->getPreProcess(), j->second->getPreProcess());
#include "common_test_utils/test_constants.hpp"
namespace FuncTestUtils {
-template<typename dType>
-void inline compareRawBuffers(const dType *res, const dType *ref,
- size_t resSize, size_t refSize,
- float max_diff = 0.01, bool printData = false) {
+
+enum CompareType{
+ ABS,
+ REL,
+ ABS_AND_REL // if absolute and relative differences are too high, an exception is thrown
+};
+/**
+ * @brief Checks values of two blobs according to given algorithm and thresholds.
+ * In ABS and REL cases thr1 corresponds to the single threshold,
+ * In ABS_AND_REL case thr1 and thr2 mean absolute and relative threshold
+ *
+ * @tparam dType Type of blob data
+ * @param res Pointer to considered blob
+ * @param ref Pointer to reference blob
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param compareType Defines an algorithm of comparision
+ * @param thr1 First threshold of difference
+ * @param thr2 Second threshold of difference
+ * @param printData A flag if data printing is demanded
+ */
+ template<typename dType>
+static void inline compareRawBuffers(const dType *res, const dType *ref,
+ size_t resSize, size_t refSize,
+ CompareType compareType, float thr1 = 0.01, float thr2 = 0.01,
+ bool printData = false) {
if (printData) {
std::cout << "Reference results: " << std::endl;
for (size_t i = 0; i < refSize; i++) {
std::cout << std::endl;
}
- for (size_t i = 0; i < refSize; i++) {
- float absDiff = std::abs(res[i] - ref[i]);
- if (absDiff > max_diff) {
- float relDiff = absDiff / std::max(res[i], ref[i]);
- ASSERT_LT(relDiff, max_diff) << "Relative comparison of values ref: " << ref[i] << " and res: "
- << res[i] << " , index in blobs: " << i << " failed!";
- }
+ switch (compareType) {
+ case CompareType::ABS:
+ for (size_t i = 0; i < refSize; i++) {
+ float absDiff = std::abs(res[i] - ref[i]);
+ ASSERT_LT(absDiff, thr1) << "Relative comparison of values ref: " << ref[i] << " and res: "
+ << res[i] << " , index in blobs: " << i << " failed!";
+ }
+ break;
+ case CompareType::REL:
+ for (size_t i = 0; i < refSize; i++) {
+ float absDiff = std::abs(res[i] - ref[i]);
+ float relDiff = absDiff / std::max(res[i], ref[i]);
+ ASSERT_LT(relDiff, thr2) << "Relative comparison of values ref: " << ref[i] << " and res: "
+ << res[i] << " , index in blobs: " << i << " failed!";
+ }
+ break;
+ case CompareType::ABS_AND_REL:
+ for (size_t i = 0; i < refSize; i++) {
+ float absDiff = std::abs(res[i] - ref[i]);
+ if (absDiff > thr1) {
+ float relDiff = absDiff / std::max(res[i], ref[i]);
+ ASSERT_LT(relDiff, thr2) << "Comparison of values ref: " << ref[i] << " and res: "
+ << res[i] << " , index in blobs: " << i << " failed!";
+ }
+ }
+ break;
}
}
-
+/**
+ * @brief Checks absolute and relative difference of blob values according to given threshold.
+ *
+ * @tparam dType Type of blob data
+ * @param res Pointer to considered blob
+ * @param ref Pointer to reference blob
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param thr Threshold of difference, absolute and relative simultaneously
+ * @param printData Flag if data printing is demanded
+ */
template<typename dType>
-void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<dType *> ref,
+static void inline compareRawBuffers(const dType *res, const dType *ref,
+ size_t resSize, size_t refSize,
+ float thr = 0.01,
+ bool printData = false) {
+ compareRawBuffers(res, ref, resSize, refSize, CompareType::ABS_AND_REL, thr, thr, printData);
+}
+/**
+ * @brief Checks values of two blobs according to given algorithm and thresholds.
+ * In ABS and REL cases thr1 corresponds to the single threshold,
+ * In ABS_AND_REL case thr1 and thr2 mean absolute and relative threshold
+ *
+ * @tparam dType Type of blob data
+ * @param res Vector of considered blob values
+ * @param ref Vector of reference blob values
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param compareType Defines an algorithm of comparision
+ * @param thr1 First threshold of difference
+ * @param thr2 Second threshold of difference
+ * @param printData A flag if data printing is demanded
+ */
+template<typename dType>
+static void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<dType *> ref,
const std::vector<size_t> &resSizes, const std::vector<size_t> &refSizes,
- float max_diff = 0.01, bool printData = false) {
+ CompareType compareType,
+ float thr1 = 0.01, float thr2 = 0.01, bool printData = false) {
ASSERT_TRUE(res.size() == ref.size()) << "Reference and Results vector have to be same length";
ASSERT_TRUE(res.size() == resSizes.size()) << "Results vector and elements count vector have to be same length";
ASSERT_TRUE(ref.size() == refSizes.size()) << "Reference vector and elements count vector have to be same length";
for (size_t i = 0; i < res.size(); i++) {
if (printData) std::cout << "BEGIN CHECK BUFFER [" << i << "]" << std::endl;
- compareRawBuffers(res[i], ref[i], resSizes[i], refSizes[i], max_diff, printData);
+ compareRawBuffers(res[i], ref[i], resSizes[i], refSizes[i], compareType, thr1, thr2, printData);
if (printData) std::cout << "END CHECK BUFFER [" << i << "]" << std::endl;
}
}
-
+/**
+ * @brief Checks absolute and relative difference of blob values according to given threshold.
+ *
+ * @tparam dType Type of blob data
+ * @param res Vector of considered blob values
+ * @param ref Vector of reference blob values
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param thr Threshold of difference, absolute and relative simultaneously
+ * @param printData A flag if data printing is demanded
+ */
template<typename dType>
-void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<std::shared_ptr<dType *>> ref,
+static void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<dType *> ref,
+ const std::vector<size_t> &resSizes, const std::vector<size_t> &refSizes,
+ float thr = 0.01, bool printData = false) {
+ compareRawBuffers(res, ref, resSizes, refSizes, CompareType::ABS_AND_REL, thr, thr, printData);
+}
+/**
+ * @brief Checks values of two blobs according to given algorithm and thresholds.
+ * In ABS and REL cases thr1 corresponds to the single threshold,
+ * In ABS_AND_REL case thr1 and thr2 mean absolute and relative threshold
+ *
+ * @tparam dType Type of blob data
+ * @param res Vector of considered blob values
+ * @param ref Vector of reference blob values
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param compareType Defines an algorithm of comparision
+ * @param thr1 First threshold of difference
+ * @param thr2 Second threshold of difference
+ * @param printData A flag if data printing is demanded
+ */
+template<typename dType>
+static void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<std::shared_ptr<dType *>> ref,
const std::vector<size_t> &resSizes, const std::vector<size_t> &refSizes,
- float max_diff = 0.01, bool printData = false) {
+ CompareType compareType,
+ float thr1 = 0.01, float thr2 = 0.01, bool printData = false) {
ASSERT_TRUE(res.size() == ref.size()) << "Reference and Results vector have to be same length";
ASSERT_TRUE(res.size() == resSizes.size()) << "Results vector and elements count vector have to be same length";
ASSERT_TRUE(ref.size() == refSizes.size()) << "Reference vector and elements count vector have to be same length";
for (size_t i = 0; i < res.size(); i++) {
if (printData) std::cout << "BEGIN CHECK BUFFER [" << i << "]" << std::endl;
- compareRawBuffers(res[i], *ref[i], resSizes[i], refSizes[i], max_diff, printData);
+ compareRawBuffers(res[i], *ref[i], resSizes[i], refSizes[i], compareType, thr1, thr2, printData);
if (printData) std::cout << "END CHECK BUFFER [" << i << "]" << std::endl;
}
}
+/**
+ * @brief Checks absolute and relative difference of blob values according to given threshold.
+ *
+ * @tparam dType Type of blob data
+ * @param res Vector of considered blob values
+ * @param ref Vector of reference blob values
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param thr Threshold of difference, absolute and relative simultaneously
+ * @param printData A flag if data printing is demanded
+ */
+template<typename dType>
+static void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<std::shared_ptr<dType *>> ref,
+ const std::vector<size_t> &resSizes, const std::vector<size_t> &refSizes,
+ float thr = 0.01, bool printData = false) {
+ compareRawBuffers(res, ref, resSizes, refSizes, CompareType::ABS_AND_REL, thr, thr, printData);
+}
template<InferenceEngine::Precision::ePrecision PRC>
void inline
}
}
-float inline GetComparisonThreshold(InferenceEngine::Precision prc) {
+void inline GetComparisonThreshold(InferenceEngine::Precision prc, float &absoluteThreshold, float &relativeThreshold) {
switch (prc) {
case InferenceEngine::Precision::FP32:
- return 1e-4;
+ absoluteThreshold = relativeThreshold = 1e-4;
+ break;
case InferenceEngine::Precision::FP16:
- return 1e-2;
+ absoluteThreshold = relativeThreshold = 1e-2;
+ break;
case InferenceEngine::Precision::I16:
case InferenceEngine::Precision::I8:
case InferenceEngine::Precision::U8:
- return 1;
+ absoluteThreshold = relativeThreshold = 1;
+ break;
default:
THROW_IE_EXCEPTION << "Unhandled precision " << prc << " passed to the GetComparisonThreshold()";
}
}
+float inline GetComparisonThreshold(InferenceEngine::Precision prc) {
+ float res;
+ GetComparisonThreshold(prc, res, res);
+ return res;
+}
+
// Copy from net_pass.h
template<InferenceEngine::Precision::ePrecision PREC_FROM, InferenceEngine::Precision::ePrecision PREC_TO>
void inline convertArrayPrecision(typename InferenceEngine::PrecisionTrait<PREC_TO>::value_type *dst,
return newBlob;
}
+InferenceEngine::Blob::Ptr inline createAndFillBlobFloatNormalDistribution(const InferenceEngine::TensorDesc &td,
+ const float mean,
+ const float stddev,
+ const int32_t seed = 1) {
+ InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td);
+ blob->allocate();
+ switch (td.getPrecision()) {
+#define CASE(X) case X: CommonTestUtils::fill_data_normal_random_float<X>(blob, mean, stddev, seed); break;
+ CASE(InferenceEngine::Precision::FP32)
+ CASE(InferenceEngine::Precision::FP16)
+ CASE(InferenceEngine::Precision::U8)
+ CASE(InferenceEngine::Precision::U16)
+ CASE(InferenceEngine::Precision::I8)
+ CASE(InferenceEngine::Precision::I16)
+ CASE(InferenceEngine::Precision::I64)
+ CASE(InferenceEngine::Precision::BIN)
+ CASE(InferenceEngine::Precision::I32)
+ CASE(InferenceEngine::Precision::BOOL)
+#undef CASE
+ default:
+ THROW_IE_EXCEPTION << "Wrong precision specified: " << td.getPrecision().name();
+ }
+ return blob;
+}
+
+InferenceEngine::Blob::Ptr inline createAndFillBlobFloat(const InferenceEngine::TensorDesc &td,
+ const uint32_t range = 10,
+ const int32_t start_from = 0,
+ const int32_t resolution = 1,
+ const int32_t seed = 1) {
+ InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td);
+
+ blob->allocate();
+ switch (td.getPrecision()) {
+#define CASE(X) case X: CommonTestUtils::fill_data_random_float<X>(blob, range, start_from, resolution, seed); break;
+ CASE(InferenceEngine::Precision::FP32)
+ CASE(InferenceEngine::Precision::FP16)
+ CASE(InferenceEngine::Precision::U8)
+ CASE(InferenceEngine::Precision::U16)
+ CASE(InferenceEngine::Precision::I8)
+ CASE(InferenceEngine::Precision::I16)
+ CASE(InferenceEngine::Precision::I64)
+ CASE(InferenceEngine::Precision::BIN)
+ CASE(InferenceEngine::Precision::I32)
+ CASE(InferenceEngine::Precision::BOOL)
+#undef CASE
+ default:
+ THROW_IE_EXCEPTION << "Wrong precision specified: " << td.getPrecision().name();
+ }
+ return blob;
+}
+
+InferenceEngine::Blob::Ptr inline createAndFillBlobWithFloatArray(const InferenceEngine::TensorDesc &td,
+ const float values[],
+ const int size) {
+ InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td);
+ blob->allocate();
+ switch (td.getPrecision()) {
+#define CASE(X) case X: CommonTestUtils::fill_data_float_array<X>(blob, values, size); break;
+ CASE(InferenceEngine::Precision::FP32)
+ CASE(InferenceEngine::Precision::FP16)
+ CASE(InferenceEngine::Precision::U8)
+ CASE(InferenceEngine::Precision::U16)
+ CASE(InferenceEngine::Precision::I8)
+ CASE(InferenceEngine::Precision::I16)
+ CASE(InferenceEngine::Precision::I64)
+ CASE(InferenceEngine::Precision::BIN)
+ CASE(InferenceEngine::Precision::I32)
+ CASE(InferenceEngine::Precision::BOOL)
+#undef CASE
+ default:
+ THROW_IE_EXCEPTION << "Wrong precision specified: " << td.getPrecision().name();
+ }
+ return blob;
+}
+
InferenceEngine::Blob::Ptr inline createAndFillBlob(const InferenceEngine::TensorDesc &td,
const uint32_t range = 10,
const int32_t start_from = 0,
cnnNetwork = InferenceEngine::CNNNetwork{function};
ConfigureNetwork();
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice);
+}
+
+void LayerTestsCommon::Infer() {
inferRequest = executableNetwork.CreateInferRequest();
for (const auto &input : cnnNetwork.getInputsInfo()) {
inferRequest.SetBlob(info->name(), blob);
inputs.push_back(blob);
}
-}
-
-void LayerTestsCommon::Infer() {
inferRequest.Infer();
}
return outputs;
}
+void LayerTestsCommon::Compare(const std::vector<std::vector<std::uint8_t>>& expectedOutputs, const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) {
+ for (std::size_t outputIndex = 0; outputIndex < expectedOutputs.size(); ++outputIndex) {
+ const auto &expected = expectedOutputs[outputIndex];
+ const auto &actual = actualOutputs[outputIndex];
+ Compare(expected, actual);
+ }
+}
+
void LayerTestsCommon::Validate() {
// nGraph interpreter does not support f16
// IE converts f16 to f32
IE_ASSERT(actualOutputs.size() == expectedOutputs.size())
<< "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size();
- for (std::size_t outputIndex = 0; outputIndex < expectedOutputs.size(); ++outputIndex) {
- const auto &expected = expectedOutputs[outputIndex];
- const auto &actual = actualOutputs[outputIndex];
- Compare(expected, actual);
- }
+ Compare(expectedOutputs, actualOutputs);
}
void LayerTestsCommon::SetRefMode(RefMode mode) {
// Run ngraph Interpreter backend to calculate references
auto refOutData = ngraph::helpers::inferFnWithInterp<ngraph::element::Type_t::f32>(fnPtr, inRawData);
// Compare IE infer results vs ngraph Interpreter reference results
- auto thr = FuncTestUtils::GetComparisonThreshold(netPrecision);
- FuncTestUtils::compareRawBuffers(outBlobsRawData, refOutData, outElementsCount, outElementsCount, thr);
-
+ float thr1, thr2;
+ FuncTestUtils::GetComparisonThreshold(netPrecision, thr1, thr2);
+ FuncTestUtils::compareRawBuffers(outBlobsRawData, refOutData, outElementsCount, outElementsCount,
+ FuncTestUtils::CompareType::ABS_AND_REL,
+ thr1, thr2);
// Deallocate ngraph::Function pointer
fnPtr.reset();
if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
virtual void Run();
+ virtual void Compare(const std::vector<std::vector<std::uint8_t>>& expectedOutputs, const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs);
+
virtual void Compare(const std::vector<std::uint8_t> &expected, const InferenceEngine::Blob::Ptr &actual);
virtual void SetRefMode(RefMode mode);
return refMode;
}
+ void ConfigurePlugin() const;
+
+ void LoadNetwork();
+
TargetDevice targetDevice;
std::shared_ptr<ngraph::Function> function;
std::map<std::string, std::string> configuration;
InferenceEngine::Precision outPrc = InferenceEngine::Precision::UNSPECIFIED;
InferenceEngine::ExecutableNetwork executableNetwork;
-private:
- void ConfigurePlugin() const;
+ virtual void Validate();
+private:
void ConfigureNetwork() const;
- void LoadNetwork();
-
void Infer();
std::vector<InferenceEngine::Blob::Ptr> GetOutputs();
- void Validate();
-
InferenceEngine::Core *core = nullptr;
InferenceEngine::CNNNetwork cnnNetwork;
InferenceEngine::InferRequest inferRequest;
class MockInferencePluginInternal2 : public InferenceEngine::InferencePluginInternal {
public:
- MOCK_METHOD3(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
- const InferenceEngine::ICore *, const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
+ MOCK_METHOD2(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
+ const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
MOCK_METHOD3(LoadNetwork, void(
InferenceEngine::IExecutableNetwork::Ptr &,
const InferenceEngine::ICNNNetwork &,
class MockInferencePluginInternal : public InferenceEngine::InferencePluginInternal {
public:
- MOCK_METHOD3(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
- const InferenceEngine::ICore *, const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
+ MOCK_METHOD2(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
+ const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr ext_ptr));
MOCK_METHOD1(SetConfig, void(const std::map <std::string, std::string> &));
class MockInferencePluginInternal3 : public InferenceEngine::InferencePluginInternal {
public:
- MOCK_METHOD3(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
- const InferenceEngine::ICore *, const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
+ MOCK_METHOD2(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
+ const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr ext_ptr));
MOCK_METHOD1(SetConfig, void(const std::map <std::string, std::string> &));
};
InferenceEngine::ExecutableNetwork ImportNetwork(const std::istream&, const std::map<std::string, std::string> &) {return {};}
MOCK_QUALIFIED_METHOD0(GetName, const noexcept, std::string(void));
MOCK_QUALIFIED_METHOD1(SetName, noexcept, void(const std::string &));
- MOCK_QUALIFIED_METHOD0(GetCore, const noexcept, const InferenceEngine::ICore*(void));
+ MOCK_QUALIFIED_METHOD0(GetCore, const noexcept, InferenceEngine::ICore*(void));
MOCK_QUALIFIED_METHOD1(SetCore, noexcept, void(InferenceEngine::ICore*));
MOCK_CONST_METHOD2(GetConfig, InferenceEngine::Parameter(const std::string& name,
namespace builder {
ngraph::ParameterVector makeParams(const element::Type &type, const std::vector<std::vector<size_t>> &shapes);
+ngraph::ParameterVector makeParams(const element::Type &type, const std::vector<std::pair<std::string, std::vector<size_t>>> &inputs);
std::shared_ptr<ngraph::Node> makeConstant(const element::Type &type, const std::vector<size_t> &shape,
const std::vector<float> &data, bool random = false);
std::shared_ptr<ngraph::Node> makeUnsqueeze(const ngraph::Output<Node> &in,
const element::Type &type,
const std::vector<size_t> &squeeze_indices);
+
+std::shared_ptr<ngraph::Node> makeProposal(const ngraph::Output<Node> &class_probs,
+ const ngraph::Output<Node> &class_logits,
+ const ngraph::Output<Node> &image_shape,
+ const element::Type &type,
+ size_t base_size,
+ size_t pre_nms_topn,
+ size_t post_nms_topn,
+ float nms_thresh,
+ size_t feat_stride,
+ size_t min_size,
+ const std::vector<float> &ratio,
+ const std::vector<float> &scale,
+ bool clip_before_nms,
+ bool clip_after_nms,
+ bool normalize,
+ float box_size_scale,
+ float box_coordinate_scale,
+ std::string framework);
} // namespace builder
} // namespace ngraph
auto paramNode = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(shape));
outs.push_back(paramNode);
}
+
+ return outs;
+}
+
+ngraph::ParameterVector makeParams(const element::Type &type, const std::vector<std::pair<std::string, std::vector<size_t>>> &inputs) {
+ ngraph::ParameterVector outs;
+ for (const auto &input : inputs) {
+ const auto &name = input.first;
+ const auto &shape = input.second;
+ auto paramNode = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(shape));
+ paramNode->set_friendly_name(name);
+ outs.push_back(paramNode);
+ }
+
return outs;
}
+
} // namespace builder
} // namespace ngraph
\ No newline at end of file
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+//
+
+#include <vector>
+#include <memory>
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<Node> makeProposal(const ngraph::Output<Node> &class_probs,
+ const ngraph::Output<Node> &class_logits,
+ const ngraph::Output<Node> &image_shape,
+ const element::Type &type,
+ size_t base_size,
+ size_t pre_nms_topn,
+ size_t post_nms_topn,
+ float nms_thresh,
+ size_t feat_stride,
+ size_t min_size,
+ const std::vector<float> &ratio,
+ const std::vector<float> &scale,
+ bool clip_before_nms,
+ bool clip_after_nms,
+ bool normalize,
+ float box_size_scale,
+ float box_coordinate_scale,
+ std::string framework) {
+ ngraph::op::ProposalAttrs attrs;
+ attrs.base_size = base_size;
+ attrs.pre_nms_topn = pre_nms_topn;
+ attrs.post_nms_topn = post_nms_topn;
+ attrs.nms_thresh = nms_thresh;
+ attrs.feat_stride = feat_stride;
+ attrs.min_size = min_size;
+ attrs.ratio = ratio;
+ attrs.scale = scale;
+ attrs.clip_before_nms = clip_before_nms;
+ attrs.clip_after_nms = clip_after_nms;
+ attrs.normalize = normalize;
+ attrs.box_size_scale = box_size_scale;
+ attrs.box_coordinate_scale = box_coordinate_scale;
+ attrs.framework = framework;
+
+ return std::make_shared<opset1::Proposal>(class_probs, class_logits, image_shape, attrs);
+}
+
+} // namespace builder
+} // namespace ngraph
auto outputTensors = std::vector<std::shared_ptr<runtime::Tensor>>{};
const auto &results = function->get_results();
- std::transform(results.cbegin(), results.cend(), std::back_inserter(outputTensors),
- [&backend](const std::shared_ptr<op::Result> &result) {
- return backend->create_tensor(result->get_element_type(), result->get_shape());
- });
+ for (size_t i = 0; i <results.size(); ++i) {
+ outputTensors.push_back(std::make_shared<HostTensor>());
+ }
auto handle = backend->compile(function);
handle->call_with_validate(outputTensors, inputTensors);
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "graph_transformer_tests.hpp"
+#include <vpu/stages/mx_stage.hpp>
+#include <vpu/middleend/hw/utility.hpp>
+
+using namespace vpu;
+
+class VPU_SplitLargeKernelConvTest : public GraphTransformerTest {
+ protected:
+ PassSet pipeline;
+ Model model;
+
+ public:
+ void InitConvStage(
+ int inputX = 8960,
+ int inputY = 1,
+ bool isOutput4D = true) {
+
+ int kernelx = 16;
+ int kernely = 1;
+ int kernelStrideX = 1;
+ int kernelStrideY = 1;
+ int dilationX = 1;
+ int dilationY = 1;
+ int padx_begin = 7;
+ int pady_begin = 0;
+ int padx_end = 8;
+ int pady_end = 0;
+ model = CreateModel();
+
+ auto input = model->addInputData(
+ "Input",
+ DataDesc(DataType::FP16, DimsOrder::NCHW, {inputX, inputY, 8, 1}));
+ model->attrs().set<int>("numInputs", 1);
+
+ Data output;
+ if (isOutput4D) {
+ output = model->addOutputData(
+ "Output",
+ DataDesc(DataType::FP16,
+ DimsOrder::NCHW,
+ {(inputX + padx_begin + padx_end - kernelx) / kernelStrideX + 1,
+ (inputY + pady_begin + pady_end - kernely) / kernelStrideY + 1, 8, 1}));
+ } else {
+ output = model->addOutputData(
+ "Output",
+ DataDesc(DataType::FP16,
+ DimsOrder::CHW,
+ {(inputX + padx_begin + padx_end - kernelx) / kernelStrideX + 1,
+ (inputY + pady_begin + pady_end - kernely) / kernelStrideY + 1, 8}));
+ }
+
+ auto conv = std::make_shared<ie::ConvolutionLayer>(ie::LayerParams{"conv", "Convolution", ie::Precision::FP16});
+ conv->_kernel_x = kernelx;
+ conv->_kernel_y = kernely;
+ conv->_stride_x = kernelStrideX;
+ conv->_stride_y = kernelStrideY;
+ conv->_dilation_x = dilationX;
+ conv->_dilation_x = dilationY;
+
+ conv->_padding.insert(0, padx_begin);
+ conv->_padding.insert(1, pady_begin);
+ conv->_pads_end.insert(0, padx_end);
+ conv->_pads_end.insert(1, pady_end);
+ conv->_auto_pad = "same_upper";
+
+ conv->_weights = ie::make_shared_blob<short>({ ie::Precision::FP16, {static_cast<size_t>(kernelx * kernely * 8 * 8)}, ie::Layout::C });
+ conv->_weights->allocate();
+
+ frontEnd->parseConvolution(model, conv, {input}, {output});
+
+ pipeline.addPass(passManager->dumpModel("initial"));
+
+ pipeline.addPass(passManager->hwPadding());
+ pipeline.addPass(passManager->dumpModel("hwPadding"));
+
+ // if large kernel conv converted to conv that can be ran on HW, then hwConvTiling will work - if not will got an exception
+ pipeline.addPass(passManager->splitLargeKernelConv());
+ pipeline.addPass(passManager->dumpModel("splitLargeKernelConv"));
+
+ pipeline.addPass(passManager->hwConvTiling());
+ pipeline.addPass(passManager->dumpModel("hwConvTiling"));
+
+ pipeline.addPass(passManager->adjustDataLayout());
+ pipeline.addPass(passManager->dumpModel("adjustDataLayout"));
+
+ pipeline.addPass(passManager->processSpecialStages());
+ pipeline.addPass(passManager->dumpModel("processSpecialStages"));
+
+ pipeline.addPass(passManager->adjustDataLocation());
+ pipeline.addPass(passManager->dumpModel("adjustDataLocation"));
+
+ pipeline.addPass(passManager->finalCheck());
+ }
+};
+
+TEST_F(VPU_SplitLargeKernelConvTest, splitLargeKernelConvIfKernelSizeIs1x16) {
+ InitCompileEnv();
+ InitConvStage();
+
+ ASSERT_NO_THROW(pipeline.run(model));
+}
\ No newline at end of file
unitTestUtils
ADD_CPPLINT
LABELS
- INFERENCE_ENGINE
+ IE
)
\ No newline at end of file
_testModel = CreateTestModel();
}
+ void checkShapeConnection(const Data& parent, const Data& child) {
+ ASSERT_NE(child->parentDataToShapeEdge(), nullptr);
+ ASSERT_EQ(child->childDataToShapeEdges().size(), 0);
+ const auto& parentDataToShapeEdge = child->parentDataToShapeEdge();
+ ASSERT_EQ(parentDataToShapeEdge->parent(), parent);
+ ASSERT_EQ(parentDataToShapeEdge->child(), child);
+
+ ASSERT_EQ(parent->parentDataToShapeEdge(), nullptr);
+
+ const auto& childDataToShapeEdges = parent->childDataToShapeEdges();
+
+ const auto& it = std::find(childDataToShapeEdges.begin(), childDataToShapeEdges.end(), parentDataToShapeEdge);
+ ASSERT_NE(it, childDataToShapeEdges.end());
+ }
+
ie::CNNLayerPtr createDSRLayer() {
return std::make_shared<ie::CNNLayer>(ie::LayerParams{"DSR", "DynamicShapeResolver", ie::Precision::I32});
}
{inputStage->output(0), inputStage->output(1)}, _testModel.getOutputs()));
}
-class DSRParsingFromNgraphTests : public DSRParsingTests {
-protected:
- void checkShapeConnection(const Data& parent, const Data& child) {
- ASSERT_NE(child->parentDataToShapeEdge(), nullptr);
- ASSERT_EQ(child->childDataToShapeEdges().size(), 0);
- const auto& parentDataToShapeEdge = child->parentDataToShapeEdge();
- ASSERT_EQ(parentDataToShapeEdge->parent(), parent);
- ASSERT_EQ(parentDataToShapeEdge->child(), child);
+TEST_F(DSRParsingTests, DSRParserPreservesConnectionsOnOutputDSR) {
+ _testModel.createInputs({_dataDesc});
+ _testModel.createOutputs({_dataDesc});
- ASSERT_EQ(parent->parentDataToShapeEdge(), nullptr);
+ const auto& model = _testModel.getBaseModel();
- const auto& childDataToShapeEdges = parent->childDataToShapeEdges();
- ASSERT_EQ(childDataToShapeEdges.size(), 1);
+ const auto& inputStage = _testModel.addStage({InputInfo::fromNetwork(0)},
+ {OutputInfo::intermediate(_dataDesc), OutputInfo::intermediate(_correstShapeDesc)});
- ASSERT_EQ(childDataToShapeEdges.front(), parentDataToShapeEdge);
- }
-};
+ model->connectDataWithShape(inputStage->output(1), inputStage->output(0));
+
+ checkShapeConnection(inputStage->output(1), inputStage->output(0));
+
+ const auto& outputStage = _testModel.addStage({InputInfo::fromPrevStage(0)},
+ {OutputInfo::intermediate(_dataDesc)});
+
+ const auto& dsrLayer = createDSRLayer();
+
+ ASSERT_NO_THROW(frontEnd->parseDSR(_testModel.getBaseModel(), dsrLayer,
+ {outputStage->output(0), inputStage->output(1)}, _testModel.getOutputs()));
+
+ checkShapeConnection(inputStage->output(1), inputStage->output(0));
+ checkShapeConnection(inputStage->output(1), outputStage->output(0));
+}
-TEST_F(DSRParsingFromNgraphTests, DSRParserCreatesTwoOutputsOnOutputDSR) {
+typedef DSRParsingTests DSRParsingFromNgraphTests;
+
+TEST_F(DSRParsingFromNgraphTests, DSRParserCreatesAndConnectsTwoOutputsOnOutputDSR) {
const auto& inPrecision = ::ngraph::element::Type(::ngraph::element::Type_t::i32);
const auto& tensor = std::make_shared<ngraph::opset3::Parameter>(inPrecision, ngraph::Shape{1, 800});
}
}
ASSERT_EQ(numOutputs, 2);
+
+ const auto& it = std::find_if(model->getStages().begin(), model->getStages().end(), [](const Stage& stage) {
+ return stage->type() == StageType::NonZero;
+ });
+
+ ASSERT_NE(it, model->getStages().end());
+ const auto& nonZeroStage = *it;
+
+ checkShapeConnection(nonZeroStage->output(1), nonZeroStage->output(0));
}
TEST_F(DSRParsingFromNgraphTests, DSRWithSingleProducerCreatesConnectionBetweenDataAndShape) {
ModelPtr model;
ASSERT_NO_THROW(model = frontEnd->buildInitialModel(cnnNet));
- Stage nonZeroStage = nullptr;
+ const auto& it = std::find_if(model->getStages().begin(), model->getStages().end(), [](const Stage& stage) {
+ return stage->type() == StageType::NonZero;
+ });
- for (const auto& stage : model->getStages()) {
- if (stage->type() != StageType::NonZero) {
- continue;
- }
- nonZeroStage = stage;
- }
-
- ASSERT_NE(nonZeroStage, nullptr);
+ ASSERT_NE(it, model->getStages().end());
+ const auto& nonZeroStage = *it;
checkShapeConnection(nonZeroStage->output(1), nonZeroStage->output(0));
}
ModelPtr model;
ASSERT_NO_THROW(model = frontEnd->buildInitialModel(cnnNet));
- Stage nonZeroStage = nullptr;
-
- for (const auto& stage : model->getStages()) {
- if (stage->type() != StageType::NonZero) {
- continue;
- }
- nonZeroStage = stage;
- }
+ const auto& it = std::find_if(model->getStages().begin(), model->getStages().end(), [](const Stage& stage) {
+ return stage->type() == StageType::NonZero;
+ });
- ASSERT_NE(nonZeroStage, nullptr);
+ ASSERT_NE(it, model->getStages().end());
+ const auto& nonZeroStage = *it;
const auto& stageReluData = nonZeroStage->output(0)->singleConsumer();
const auto& stageReluShape = nonZeroStage->output(1)->singleConsumer();
PassSet::Ptr _middleEnd = nullptr;
};
-TEST_F(StageDependencyEdgeProcessingTests, AddStageDependencyAssertsOnOutputData) {
+TEST_F(StageDependencyEdgeProcessingTests, AddStageDependencyDoesNotAssertOnOutputData) {
//
// -> [Data] -> (Stage) -> [Output]
// [Input] -> (Stage) |
auto model = _testModel.getBaseModel();
- ASSERT_ANY_THROW(model->addStageDependency(dependentStage, dependencyProducer->output(0)));
+ ASSERT_NO_THROW(model->addStageDependency(dependentStage, dependencyProducer->output(0)));
}
TEST_F(StageDependencyEdgeProcessingTests, NetWithTwoStagesHasCorrectExecOrder) {
#include "holders_tests.hpp"
-INSTANTIATE_TEST_CASE_P(ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
+INSTANTIATE_TEST_CASE_P(smoke_ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
// 0 - plugin
// 1 - executable_network
// 2 - infer_request
#include "behavior_test_plugins.hpp"
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
getOutputTestCaseName);
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
ValuesIn(supportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
ValuesIn(supportedValues),
getTestCaseName);
// Disabled due to a bug on CentOS that leads to segmentation fault of application on exit
// when perf counters are enabled
-//INSTANTIATE_TEST_CASE_P(
+//INSTANTIATE_TEST_CASE_P(smoke_
// BehaviorTest,
// BehaviorPluginTestExecGraphInfo,
// ValuesIn(supportedValues),
#include "behavior_test_plugin_infer_request.hpp"
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
#include "behavior_test_plugin_infer_request_callback.hpp"
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_infer_request_config.hpp"
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
getConfigTestCaseName);
#include "behavior_test_plugin_infer_request_input.hpp"
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_infer_request_output.hpp"
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
getOutputTestCaseName);
// FIXME
//#if (defined INSTANTIATE_TESTS)
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, MemoryLayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MemoryLayerTest,
// ::testing::ValuesIn(memory_test_cases),
// getTestName<memory_test_params>);
//#endif
layout_test_params("GPU", "FP32", Layout::NCHW, power_params({ { 1, 3, 16, 16 } }, 1, 2, 2)),
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadPower,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadPower,
::testing::ValuesIn(power_test_cases), getTestName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadConv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadConv,
::testing::ValuesIn(conv_test_cases), getTestName);
// Disabled due to a bug on CentOS that leads to segmentation fault of application on exit
// when perf counters are enabled
-//INSTANTIATE_TEST_CASE_P(
+//INSTANTIATE_TEST_CASE_P(smoke_
// BehaviorTest,
// BehaviorPluginTestPerfCounters,
// ValuesIn(supportedValues),
#include "behavior_test_plugin_set_preprocess.hpp"
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
BehaviorPluginTestPreProcess,
ValuesIn(supportedValues),
getTestCaseName);
#include "behavior_test_plugin_unsupported.hpp"
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_version.hpp"
#include "cldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.model_xml_str, \
FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.weights_blob, \
Precision::FP32)
+// for multi-device we are testing the fp16 (as it is supported by all device combos we are considering for testing
+// e.g. GPU and VPU, for CPU the network is automatically (internally) converted to fp32.
+const std::map<std::string, std::string> multi_device_conf = {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}};
+#define BEH_MULTI BehTestParams("MULTI", \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.model_xml_str, \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.weights_blob, \
+ Precision::FP32, \
+ multi_device_conf)
// all parameters are unsupported - reversed
#define BEH_US_ALL_CLDNN BehTestParams("GPU", \
FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
Precision::Q78)
+#define BEH_US_ALL_MULTI BehTestParams("MULTI", \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
+ Precision::Q78, \
+ multi_device_conf)
const BehTestParams supportedValues[] = {
BEH_CLDNN,
+ BEH_MULTI,
};
const BehTestParams requestsSupportedValues[] = {
BEH_CLDNN,
+ BEH_MULTI,
};
const BehTestParams allInputSupportedValues[] = {
BEH_CLDNN.withIn(Precision::FP16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
BEH_CLDNN.withIn(Precision::I16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
BEH_CLDNN.withIn(Precision::I32).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
+ BEH_MULTI, BEH_MULTI.withIn(Precision::FP16), BEH_MULTI.withIn(Precision::U8), BEH_MULTI.withIn(Precision::I16),
+ BEH_MULTI.withIn(Precision::I32),
+ BEH_MULTI.withIn(Precision::U8).withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
+ BEH_MULTI.withIn(Precision::FP16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO},
+ {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}}),
+ BEH_MULTI.withIn(Precision::I16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO},
+ {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}}),
+ BEH_MULTI.withIn(Precision::I32).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO},
+ {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}}),
};
const BehTestParams allOutputSupportedValues[] = {
BEH_CLDNN, BEH_CLDNN.withOut(Precision::FP16),
BEH_CLDNN.withIn(Precision::FP16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
+ BEH_MULTI, BEH_MULTI.withOut(Precision::FP16),
+ BEH_MULTI.withIn(Precision::FP16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO},
+ {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}}),
};
const BehTestParams typeUnSupportedValues[] = {
BEH_CLDNN.withIn(Precision::Q78), BEH_CLDNN.withIn(Precision::I8),
+ BEH_MULTI.withIn(Precision::Q78), BEH_MULTI.withIn(Precision::I8),
};
const BehTestParams allUnSupportedValues[] = {
BEH_US_ALL_CLDNN,
+ BEH_US_ALL_MULTI,
};
const std::vector<BehTestParams> withCorrectConfValues = {
BEH_CLDNN.withConfig({{KEY_TUNING_MODE, TUNING_CREATE},
{KEY_TUNING_FILE, "tfile"}}),
BEH_CLDNN.withConfig({{KEY_DEVICE_ID, "0"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_GPU_THROUGHPUT_STREAMS, "2"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_PERF_COUNT, NO}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_DUMP_KERNELS, NO}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_DUMP_KERNELS, YES}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_TUNING_MODE, TUNING_DISABLED}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_TUNING_MODE, TUNING_CREATE},
+ {KEY_TUNING_FILE, "tfile"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_DEVICE_ID, "0"}}),
};
const BehTestParams withIncorrectConfValues[] = {
// FIXME: [IE clDNN] The plugin doesn't throw GENERAL_ERROR if use non-exist tuning file. CVS-8593
//BEH_CLDNN.withConfig({ { KEY_TUNING_MODE, TUNING_USE_EXISTING },
// { KEY_TUNING_FILE, "unknown_file" } }),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_GPU_THROUGHPUT_STREAMS, "OFF"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_PERF_COUNT, "ON"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_CONFIG_FILE, "unknown_file"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_DUMP_KERNELS, "ON"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_TUNING_MODE, "TUNING_UNKNOWN_MODE"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+ {KEY_DEVICE_ID, "DEVICE_UNKNOWN"}})
};
const std::vector<BehTestParams> withCorrectConfValuesNetworkOnly = {
const BehTestParams withIncorrectConfKeys[] = {
BEH_CLDNN.withIncorrectConfigItem(),
+ BEH_MULTI.withIncorrectConfigItem(),
};
#include "holders_tests.hpp"
-INSTANTIATE_TEST_CASE_P(ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
+INSTANTIATE_TEST_CASE_P(smoke_ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
// 0 - plugin
// 1 - executable_network
// 2 - infer_request
#include "behavior_test_plugins.hpp"
#include "gna_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
getOutputTestCaseName);
#include "behavior_test_plugin_config.hpp"
#include "gna_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
ValuesIn(withIncorrectConfKeys),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
ValuesIn(supportedValues),
getTestCaseName);
#include "gna_test_data.hpp"
INSTANTIATE_TEST_CASE_P(
- BehaviorTest,
+ smoke_BehaviorTest,
BehaviorPluginTestExecGraphInfo,
ValuesIn(supportedValues),
getTestCaseName);
#include "behavior_test_plugin_infer_request.hpp"
#include "gna_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
#include "gna_test_data.hpp"
// TODO: support InferRequestCallback in GNAPlugin
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
// getTestCaseName);
#include "behavior_test_plugin_infer_request_config.hpp"
#include "gna_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfig,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfig,
ValuesIn(withCorrectConfValues),
getConfigTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
getConfigTestCaseName);
bool CheckGnaHw() {
}
}
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestWithGnaHw,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestWithGnaHw,
ValuesIn(withGnaHwConfValue),
getConfigTestCaseName);
#include "behavior_test_plugin_infer_request_input.hpp"
#include "gna_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_infer_request_output.hpp"
#include "gna_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
getOutputTestCaseName);
conv_test_params(CommonTestUtils::DEVICE_GNA, conv_case)
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, DeconvolutionLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, DeconvolutionLayerTest,
::testing::ValuesIn(deconv_test_cases),
getTestName<conv_test_params>);
};
// TODO: fix this
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, PoolingLayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, PoolingLayerTest,
// ::testing::Values(pool_test_params("GNAPlugin", "FP32", pool_case)),
// getTestName<pool_test_params>);
//
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, ReLULayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ReLULayerTest,
// ::testing::Values(activ_test_params("GNAPlugin", "FP32", activation_case)),
// getTestName<activ_test_params>);
// FIXME
//#if (defined INSTANTIATE_TESTS)
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, MemoryLayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MemoryLayerTest,
// ::testing::ValuesIn(memory_test_cases),
// getTestName<memory_test_params>);
//#endif
layout_test_params(CommonTestUtils::DEVICE_GNA, "FP32", Layout::NCHW, power_params({ { 1, 3, 16, 16 } }, 2, 2, 2)),
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadActiv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadActiv,
::testing::ValuesIn(activ_test_cases), getTestName);
#include "behavior_test_plugin_unsupported.hpp"
#include "gna_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
getTestCaseName);
- INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestBatchUnsupported, ValuesIn(batchUnSupportedValues),
+ INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestBatchUnsupported, ValuesIn(batchUnSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_version.hpp"
#include "gna_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
#include "holders_tests.hpp"
-INSTANTIATE_TEST_CASE_P(ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
+INSTANTIATE_TEST_CASE_P(smoke_ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
// 0 - plugin
// 1 - executable_network
// 2 - infer_request
#include "behavior_test_plugins.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
getOutputTestCaseName);
#include "behavior_test_plugin_config.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTest,
ValuesIn(BehTestParams::concat(withCorrectConfValues, withCorrectConfValuesPluginOnly)),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
ValuesIn(withIncorrectConfKeys),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
ValuesIn(supportedValues),
getTestCaseName);
#include "mkldnn_test_data.hpp"
INSTANTIATE_TEST_CASE_P(
- BehaviorTest,
+ smoke_BehaviorTest,
BehaviorPluginTestExecGraphInfo,
ValuesIn(supportedValues),
getTestCaseName);
#include "behavior_test_plugin_infer_request.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
#include "behavior_test_plugin_infer_request_callback.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_infer_request_config.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfig,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfig,
ValuesIn(BehTestParams::concat(withCorrectConfValues, withCorrectConfValuesNetworkOnly)),
getConfigTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
getConfigTestCaseName);
#include "behavior_test_plugin_infer_request_input.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_infer_request_output.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
getOutputTestCaseName);
pool_test_params(CommonTestUtils::DEVICE_CPU, "FP32", pool_case),
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, ROIPoolingLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ROIPoolingLayerTest,
::testing::ValuesIn(roi_pool_test_cases),
getTestName<pool_test_params>);
activ_test_params(CommonTestUtils::DEVICE_CPU, "FP32", activation_case),
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, ActivationLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ActivationLayerTest,
::testing::ValuesIn(activ_test_cases),
getTestName<activ_test_params>);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, ReLULayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ReLULayerTest,
::testing::Values(activ_test_params("CPU", "FP32", activation_case)),
getTestName<activ_test_params>);
norm_test_params(CommonTestUtils::DEVICE_CPU, "FP32", norm_case),
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, NormalizeLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, NormalizeLayerTest,
::testing::ValuesIn(norm_test_cases),
getTestName<norm_test_params>);
layout_test_params(CommonTestUtils::DEVICE_CPU, "FP32", Layout::CHW, power_params({ { 3, 32, 16 } }, 2, 2, 2)),
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadPower,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadPower,
::testing::ValuesIn(power_test_cases), getTestName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadConv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadConv,
::testing::ValuesIn(conv_test_cases), getTestName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanNotLoadConv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanNotLoadConv,
::testing::ValuesIn(conv_neg_test_cases), getTestName);
#include "behavior_test_plugin_set_preprocess.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
BehaviorPluginTestPreProcess,
ValuesIn(requestsSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_unsupported.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_version.hpp"
#include "mkldnn_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.model_xml_str, \
FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.weights_blob, \
Precision::FP32)
+// for multi-device we are testing the fp16 (as it is supported by all device combos we are considering for testing
+// e.g. GPU and VPU, for CPU the network is automatically (internally) converted to fp32.
+// Yet the input precision FP16 is not supported by the CPU yet
+const std::map<std::string, std::string> multi_device_conf = {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"}};
+#define BEH_MULTI BehTestParams("MULTI", \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.model_xml_str, \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.weights_blob, \
+ Precision::FP32, \
+ multi_device_conf)
// all parameters are unsupported - reversed
#define BEH_US_ALL_MKLDNN BehTestParams("CPU", \
FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
Precision::Q78)
+#define BEH_US_ALL_MULTI BehTestParams("MULTI", \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
+ Precision::Q78, \
+ multi_device_conf)
const BehTestParams supportedValues[] = {
BEH_MKLDNN,
+ BEH_MULTI,
};
const BehTestParams requestsSupportedValues[] = {
BEH_MKLDNN,
// the following adds additional test the MKLDNNGraphlessInferRequest (explicitly created for streams)
BEH_MKLDNN.withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
+ BEH_MKLDNN.withConfig({{CONFIG_KEY(CPU_THROUGHPUT_STREAMS),"0"},
+ {CONFIG_KEY(CPU_THREADS_NUM), "1"}}),
+ BEH_MULTI,
};
const BehTestParams allInputSupportedValues[] = {
BEH_MKLDNN_FP16.withIn(Precision::U8).withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
BEH_MKLDNN_FP16.withIn(Precision::U16).withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
BEH_MKLDNN_FP16.withIn(Precision::I16).withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
+ BEH_MULTI,
+ BEH_MULTI.withIn(Precision::U8),
+ BEH_MULTI.withIn(Precision::U16),
+ BEH_MULTI.withIn(Precision::I16),
+ BEH_MULTI.withIn(Precision::U8).withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
+ BEH_MULTI.withIn(Precision::U16).withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
+ BEH_MULTI.withIn(Precision::I16).withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
};
const BehTestParams allOutputSupportedValues[] = {
BEH_MKLDNN,
// the following withConfig test checks the MKLDNNGraphlessInferRequest (explicitly created for streams)
BEH_MKLDNN.withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
+ BEH_MULTI.withOut(Precision::FP32),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
};
const BehTestParams typeUnSupportedValues[] = {
BEH_MKLDNN.withIn(Precision::Q78),
BEH_MKLDNN_FP16,
+ BEH_MULTI.withIn(Precision::Q78),
};
const BehTestParams allUnSupportedValues[] = {
BEH_US_ALL_MKLDNN,
+ BEH_US_ALL_MULTI,
};
const std::vector<BehTestParams> withCorrectConfValues = {
BEH_MKLDNN.withConfig({{KEY_CPU_BIND_THREAD, NO}}),
BEH_MKLDNN.withConfig({{KEY_CPU_BIND_THREAD, YES}}),
BEH_MKLDNN.withConfig({{KEY_DYN_BATCH_LIMIT, "10"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_NUMA}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_THROUGHPUT_STREAMS, "8"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_BIND_THREAD, NO}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_BIND_THREAD, YES}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_DYN_BATCH_LIMIT, "10"}}),
};
const BehTestParams withIncorrectConfValues[] = {
BEH_MKLDNN.withConfig({{KEY_CPU_THROUGHPUT_STREAMS, "OFF"}}),
BEH_MKLDNN.withConfig({{KEY_CPU_BIND_THREAD, "OFF"}}),
BEH_MKLDNN.withConfig({{KEY_DYN_BATCH_LIMIT, "NAN"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_THROUGHPUT_STREAMS, "OFF"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_CPU_BIND_THREAD, "OFF"}}),
+ BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+ {KEY_DYN_BATCH_LIMIT, "NAN"}}),
};
const std::vector<BehTestParams> withCorrectConfValuesPluginOnly;
const std::vector<BehTestParams> withCorrectConfValuesNetworkOnly = {
BEH_MKLDNN.withConfig({}),
+ BEH_MULTI
};
const BehTestParams withIncorrectConfKeys[] = {
BEH_MKLDNN.withIncorrectConfigItem(),
+ BEH_MULTI.withIncorrectConfigItem(),
};
${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
add_library(${TARGET_NAME} STATIC ${SHARED_TESTS_SRC})
-add_dependencies(${TARGET_NAME} inference_engine_preproc)
+add_dependencies(${TARGET_NAME} MultiDevicePlugin inference_engine_preproc)
target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/plugin_tests")
#include <gna/gna_config.hpp>
#include <multi-device/multi_device_config.hpp>
#include <cpp_interfaces/exception2status.hpp>
-#include <tests_utils.hpp>
+#include <common_test_utils/test_assertions.hpp>
#include <memory>
#include <fstream>
// for multi-device the number of Executors is not known (defined by the devices configuration)
} else {
ASSERT_EQ(0u, ExecutorManager::getInstance()->getExecutorsNumber());
- ASSERT_GE(1u, ExecutorManager::getInstance()->getIdleCPUStreamsExecutorsNumber());
+ ASSERT_GE(2u, ExecutorManager::getInstance()->getIdleCPUStreamsExecutorsNumber());
}
}
if (GetParam().device == CommonTestUtils::DEVICE_CPU) {
namespace {
std::string getTestCaseName(testing::TestParamInfo<BehTestParams> obj) {
- return obj.param.device + "_" + obj.param.input_blob_precision.name()
- + (obj.param.config.size() ? "_" + obj.param.config.begin()->second : "");
+ std::string config;
+ for (auto&& cfg : obj.param.config) {
+ config += "_" + cfg.first + "_" + cfg.second;
+ }
+ return obj.param.device + "_" + obj.param.input_blob_precision.name() + config;
}
}
TEST_P(BehaviorPluginTestInferRequest, returnDeviceBusyOnSetBlobAfterAsyncInfer) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
+ auto&& config = GetParam().config;
+ auto itConfig = config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
+ if (itConfig != config.end()) {
+ if (itConfig->second != "CPU_THROUGHPUT_AUTO") {
+ if (std::stoi(itConfig->second) == 0) {
+ GTEST_SKIP() << "Not applicable with disabled streams";
+ }
+ }
+ }
TestEnv::Ptr testEnv;
ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
Blob::Ptr input;
namespace {
std::string getTestCaseName(testing::TestParamInfo<BehTestParams> obj) {
- return obj.param.device + "_" + obj.param.input_blob_precision.name()
- + (obj.param.config.size() ? "_" + obj.param.config.begin()->second : "");
+ std::string config;
+ for (auto&& cfg : obj.param.config) {
+ config += "_" + cfg.first + "_" + cfg.second;
+ }
+ return obj.param.device + "_" + obj.param.input_blob_precision.name() + config;
}
}
//
#include "behavior_test_plugin.h"
-#include <test_assertions.hpp>
using namespace std;
using namespace ::testing;
class BehaviorPluginTestInput : public BehaviorPluginTest { };
class BehaviorPluginTestOutput : public BehaviorPluginTest { };
-TEST_F(BehaviorPluginTest, AllocateNullBlob) {
+TEST_F(BehaviorPluginTest, smoke_llocateNullBlob) {
TensorDesc tdesc = TensorDesc(Precision::FP32, NCHW);
InferenceEngine::TBlob<float> blob(tdesc);
ASSERT_NO_THROW(blob.allocate());
target_link_libraries(${TARGET_NAME} PRIVATE mvnc)
endif()
- add_test(NAME ${TARGET_NAME}
- COMMAND ${TARGET_NAME})
+ add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+ set_property(TEST ${TARGET_NAME} PROPERTY LABELS VPU MYRIAD)
add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
endfunction(enable_vpu)
BEH_MYRIAD,
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, AOTBehaviorTests, ValuesIn(vpuValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, AOTBehaviorTests, ValuesIn(vpuValues), getTestCaseName);
#endif
BEH_MYRIAD,
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, MYRIADBoot, ValuesIn(vpuValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MYRIADBoot, ValuesIn(vpuValues), getTestCaseName);
}
};
-TEST_F(VPUGetMetric, GetThermalStatsFromNetwork) {
+TEST_F(VPUGetMetric, smoke_GetThermalStatsFromNetwork) {
const auto exe_network = loadNetworkOnDevice("MYRIAD");
auto result = Parameter{};
ASSERT_GT(result.as<float>(), 0);
}
-TEST_F(VPUGetMetric, GetThermalStatsFromPlugin) {
+TEST_F(VPUGetMetric, smoke_GetThermalStatsFromPlugin) {
std::vector<std::string> availableDevices;
ASSERT_NO_THROW(availableDevices = getAvailableDevices());
ASSERT_TRUE(!availableDevices.empty());
}
}
-TEST_F(VPUGetMetric, ThermalStatsFromPluginWithIncorrectID) {
+TEST_F(VPUGetMetric, smoke_ThermalStatsFromPluginWithIncorrectID) {
std::vector<std::string> availableDevices;
ASSERT_NO_THROW(availableDevices = getAvailableDevices());
ASSERT_TRUE(!availableDevices.empty());
ASSERT_TRUE(result.empty());
}
-TEST_F(VPUGetMetric, ThermalStatsFromPluginWithoutLoadedNetwork) {
+TEST_F(VPUGetMetric, smoke_ThermalStatsFromPluginWithoutLoadedNetwork) {
std::vector<std::string> availableDevices;
ASSERT_NO_THROW(availableDevices = getAvailableDevices());
ASSERT_TRUE(!availableDevices.empty());
ASSERT_TRUE(result.empty());
}
-TEST_F(VPUGetMetric, MyriadGetAvailableDevices) {
+TEST_F(VPUGetMetric, smoke_MyriadGetAvailableDevices) {
std::vector<std::string> availableDevices;
ASSERT_NO_THROW(availableDevices = getAvailableDevices());
ASSERT_TRUE(!availableDevices.empty());
#include "behavior_test_plugin.h"
#include "helpers/myriad_load_network_case.hpp"
-TEST_F(MyriadLoadNetworkTestCase, ReloadPlugin) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_ReloadPlugin) {
ASSERT_NO_THROW(LoadNetwork());
ASSERT_NO_THROW(LoadNetwork());
}
-TEST_F(MyriadLoadNetworkTestCase, SimpleLoading) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_SimpleLoading) {
auto devices = getDevicesList();
ASSERT_TRUE(devices.size());
ASSERT_TRUE(!IsDeviceAvailable(device_to_load));
}
-TEST_F(MyriadLoadNetworkTestCase, LoadingAtTheSameDevice) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_LoadingAtTheSameDevice) {
auto devices = getDevicesList();
ASSERT_TRUE(devices.size());
ie->LoadNetwork(cnnNetwork, "MYRIAD", config));
}
-TEST_F(MyriadLoadNetworkTestCase, ThrowsExeptionWhenNameIsInvalid) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_ThrowsExeptionWhenNameIsInvalid) {
auto device_to_load = "SomeVeryBadName";
std::map<std::string, std::string> config = {
{KEY_DEVICE_ID, device_to_load},
ie->LoadNetwork(cnnNetwork, "MYRIAD", config));
}
-TEST_F(MyriadLoadNetworkTestCase, ThrowsExeptionWhenPlatformConflictWithProtocol) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_ThrowsExeptionWhenPlatformConflictWithProtocol) {
std::string wrong_platform;
auto devices = getDevicesList();
ASSERT_TRUE(devices.size());
ASSERT_EQ(statusCode, StatusCode::OK) << resp.msg;
}
-INSTANTIATE_TEST_CASE_P(VPUConfigProtocolTests,
+INSTANTIATE_TEST_CASE_P(smoke_VPUConfigProtocolTests,
MyriadProtocolTests,
::testing::ValuesIn(myriadProtocols),
MyriadProtocolTests::getTestCaseName);
\ No newline at end of file
BEH_MYRIAD,
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, MYRIADWatchdog, ValuesIn(vpuValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MYRIADWatchdog, ValuesIn(vpuValues), getTestCaseName);
#include "holders_tests.hpp"
-INSTANTIATE_TEST_CASE_P(ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
+INSTANTIATE_TEST_CASE_P(smoke_ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
// 0 - plugin
// 1 - executable_network
// 2 - infer_request
#include "behavior_test_plugins.hpp"
#include "vpu_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
getOutputTestCaseName);
#include "vpu_test_data.hpp"
INSTANTIATE_TEST_CASE_P(
- BehaviorTest, BehaviorPluginCorrectConfigTest,
+ smoke_BehaviorTest, BehaviorPluginCorrectConfigTest,
ValuesIn(
BehTestParams::concat(
BehTestParams::concat(deviceSpecificConfigurations, deviceAgnosticConfigurations),
getTestCaseName
);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
ValuesIn(withIncorrectConfKeys),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
ValuesIn(supportedValues),
getTestCaseName);
// TODO: currently this tests are not applicable to myriadPlugin
#if 0
-INSTANTIATE_TEST_CASE_P(
+INSTANTIATE_TEST_CASE_P(smoke_
BehaviorTest,
BehaviorPluginTestExecGraphInfo,
ValuesIn(supportedValues),
#include "behavior_test_plugin_infer_request.hpp"
#include "vpu_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
#include "behavior_test_plugin_infer_request_callback.hpp"
#include "vpu_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_infer_request_config.hpp"
#include "vpu_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfig,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfig,
ValuesIn(BehTestParams::concat(deviceAgnosticConfigurations, withCorrectConfValuesNetworkOnly)),
getConfigTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
getConfigTestCaseName);
#include "behavior_test_plugin_infer_request_input.hpp"
#include "vpu_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_infer_request_output.hpp"
#include "vpu_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
getOutputTestCaseName);
pool_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", pool_case),
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, ROIPoolingLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ROIPoolingLayerTest,
::testing::ValuesIn(roi_pool_test_cases),
getTestName<pool_test_params>);
// FIXME
//#if (defined INSTANTIATE_TESTS)
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, MemoryLayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MemoryLayerTest,
// ::testing::ValuesIn(memory_test_cases),
// getTestName<memory_test_params>);
//#endif
layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::CHW, power_params({ { 3, 32, 16 } }, 2, 2, 2)),
layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::NCHW, power_params({ { 1, 3, 16, 16 } }, 2, 2, 2)),
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadPower,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadPower,
::testing::ValuesIn(power_test_cases), getTestName);
layout_test_params conv_neg_test_cases[] = {
layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::NC, power_params({ { 1, 3 } }, 2, 2, 2)),
};
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanNotLoadConv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanNotLoadConv,
::testing::ValuesIn(conv_neg_test_cases), getTestName);
layout_test_params conv_test_cases[] = {
layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::CHW, power_params({ { 3, 32, 16 } }, 2, 2, 2)),
layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::NCHW, power_params({ { 1, 3, 16, 16 } }, 2, 2, 2)),
};
- INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadConv,
+ INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadConv,
::testing::ValuesIn(conv_test_cases), getTestName);
#include "behavior_test_plugin_set_preprocess.hpp"
#include "vpu_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
BehaviorPluginTestPreProcess,
ValuesIn(supportedValues),
getTestCaseName);
#include "behavior_test_plugin_unsupported.hpp"
#include "vpu_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
getTestCaseName);
- INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestBatchUnsupported, ValuesIn(batchUnSupportedValues),
+ INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestBatchUnsupported, ValuesIn(batchUnSupportedValues),
getTestCaseName);
#include "behavior_test_plugin_version.hpp"
#include "vpu_test_data.hpp"
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.model_xml_str, \
FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.weights_blob, \
Precision::FP32)
+// for multi-device we are testing the fp16 (as it is supported by all device combos we are considering for testing
+// e.g. GPU and VPU, for CPU the network is automatically (internally) converted to fp32.
+#define BEH_MULTI(device) BehTestParams("MULTI", \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.model_xml_str, \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.weights_blob, \
+ Precision::FP32, \
+ {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, #device}})
+#define BEH_MULTI_CONFIG BehTestParams("MULTI", \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.model_xml_str, \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.weights_blob, \
+ Precision::FP32)
// all parameters are unsupported - reversed
#define BEH_US_ALL_MYRIAD BehTestParams("MYRIAD", \
FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
Precision::Q78)
+#define BEH_US_ALL_MULTI(device) BehTestParams("MULTI", \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
+ FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
+ Precision::Q78, \
+ {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, #device}})
const BehTestParams supportedValues[] = {
BEH_MYRIAD,
+ BEH_MULTI(MYRIAD),
};
const BehTestParams requestsSupportedValues[] = {
BEH_MYRIAD,
+ BEH_MULTI(MYRIAD),
};
const BehTestParams allInputSupportedValues[] = {
BEH_MYRIAD, BEH_MYRIAD.withIn(Precision::U8), BEH_MYRIAD.withIn(Precision::FP16),
+ BEH_MULTI(MYRIAD), BEH_MULTI(MYRIAD).withIn(Precision::U8), BEH_MULTI(MYRIAD).withIn(Precision::FP16),
// I16 not supported yet
- // (ISSUE-7979) [IE myriad] The plugin should support I16 format for Input
+ // (CVS-7979) [IE myriad] The plugin should support I16 format for Input
//BEH_MYRIAD.withIn(Precision::I16),
};
const BehTestParams allOutputSupportedValues[] = {
BEH_MYRIAD, BEH_MYRIAD.withOut(Precision::FP16),
+ BEH_MULTI(MYRIAD), BEH_MULTI(MYRIAD).withOut(Precision::FP16),
};
const BehTestParams typeUnSupportedValues[] = {
BEH_MYRIAD.withIn(Precision::Q78), BEH_MYRIAD.withIn(Precision::U16), BEH_MYRIAD.withIn(Precision::I8),
BEH_MYRIAD.withIn(Precision::I16), BEH_MYRIAD.withIn(Precision::I32),
+ BEH_MULTI(MYRIAD).withIn(Precision::Q78), BEH_MULTI(MYRIAD).withIn(Precision::U16),
+ BEH_MULTI(MYRIAD).withIn(Precision::I8),
+ BEH_MULTI(MYRIAD).withIn(Precision::I16), BEH_MULTI(MYRIAD).withIn(Precision::I32),
};
const BehTestParams batchUnSupportedValues[] = {
BEH_MYRIAD.withBatchSize(0),
+ BEH_MULTI(MYRIAD).withBatchSize(0),
};
const BehTestParams allUnSupportedValues[] = {
BEH_US_ALL_MYRIAD,
+ BEH_US_ALL_MULTI(MYRIAD),
};
const std::vector<BehTestParams> deviceSpecificConfigurations = {
BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), CONFIG_VALUE(YES)}}),
BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), CONFIG_VALUE(NO)}}),
+
+ BEH_MULTI_CONFIG.withConfig({
+ {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+ {CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_DEBUG)}
+ }),
+ BEH_MULTI_CONFIG.withConfig({
+ {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+ {VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), CONFIG_VALUE(YES)}
+ }),
};
const std::vector<BehTestParams> withCorrectConfValuesPluginOnly = {
BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), "ON"}}),
BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), "OFF"}}),
+
+ BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+ {VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION),"ON"}}),
+ BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+ {CONFIG_KEY(LOG_LEVEL), "VERBOSE"}}),
+ BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+ {VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}}),
+ BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+ {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "-1"}}),
+ BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+ {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "0"}}),
+ BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+ {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "1"}}),
};
const BehTestParams withIncorrectConfKeys[] = {
BEH_MYRIAD.withIncorrectConfigItem(),
+ BEH_MULTI(MYRIAD).withIncorrectConfigItem(),
};
else()
target_compile_definitions(${TARGET} PRIVATE -DPERF_TEST=0)
endif()
+
+add_test(NAME ${TARGET} COMMAND ${TARGET})
+set_property(TEST ${TARGET} PROPERTY LABELS IE PREPROC)
list(APPEND CLDNN_LIBS
IESharedTests
inference_engine_lp_transformations
- inference_engine_ir_readers
${CLDNN__IOCL_ICD_LIBPATH})
# try to find VA libraries
add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
-add_test(NAME ${TARGET_NAME}
- COMMAND ${TARGET_NAME})
+add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+set_property(TEST ${TARGET_NAME} PROPERTY LABELS GPU)
add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
-add_test(NAME ${TARGET_NAME}
- COMMAND ${TARGET_NAME})
-
-set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
+add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+set_property(TEST ${TARGET_NAME} PROPERTY LABELS GNA)
if(GNA_LIBRARY_VERSION STREQUAL "GNA1")
target_compile_definitions(${TARGET_NAME} PRIVATE GNA1_LIB)
INSTANTIATE_TEST_CASE_P(
DISABLED_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_NETWORK_NAME,
::testing::Values("GNA", "MULTI:GNA", "HETERO:GNA"));
-//
-// TODO: this metric is not supported by the plugin
+
INSTANTIATE_TEST_CASE_P(
- DISABLED_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
- ::testing::Values("GNA", "MULTI:GNA", "HETERO:GNA"));
+ IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
+ ::testing::Values("GNA"/*, "MULTI:GNA", "HETERO:GNA" */));
INSTANTIATE_TEST_CASE_P(
IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_ThrowsUnsupported,
list(APPEND MKL_DNN_LIBS
IESharedTests
inference_engine_lp_transformations
- inference_engine_ir_readers
${Boost_REGEX_LIBRARY})
list(APPEND TEST_SRC ${MKL_DNN_TEST_SOURCES})
add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
-add_test(NAME ${TARGET_NAME}
- COMMAND ${TARGET_NAME})
+add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+set_property(TEST ${TARGET_NAME} PROPERTY LABELS CPU)
inferRequest1.Infer();
float* outRawDataWithConfig = inferRequest1.GetBlob(net.getOutputsInfo().begin()->first)->cbuffer().as<float*>();
- auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
+ float thr1, thr2;
+ FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32, thr1, thr2);
size_t outElementsCount = std::accumulate(begin(fnPtr->get_output_shape(0)), end(fnPtr->get_output_shape(0)), 1,
std::multiplies<size_t>());
- FuncTestUtils::compareRawBuffers(outRawData, outRawDataWithConfig, outElementsCount,
- outElementsCount,
- thr);
+ FuncTestUtils::compareRawBuffers(outRawData, outRawDataWithConfig, outElementsCount, outElementsCount,
+ FuncTestUtils::CompareType::ABS_AND_REL,
+ thr1, thr2);
}
#include <tests_common.hpp>
#include <tests_common_func.hpp>
#include <memory>
-#include <tests_utils.hpp>
#include <multi-device/multi_device_config.hpp>
#include <ie_core.hpp>
#include <ie_plugin_ptr.hpp>
std::shared_ptr<IExtension> ext(new NewTestExtension());
checkNotSharedExtensions(ext, "CPU");
}
-
-TEST_F(smoke_ExtensionTest, MULTI_delete_extension) {
- try {
- InferenceEngine::Core ie;
- ie.GetVersions("MULTI");
- } catch (...) {
- GTEST_SKIP();
- }
- std::shared_ptr<IExtension> ext(new TestExtension());
- checkExtensionRemoved({"MultiDevice", ext, {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"}}});
-}
-
-TEST_F(smoke_ExtensionTest, MULTI_no_delete_extension_from_another_engine) {
- try {
- InferenceEngine::Core ie;
- ie.GetVersions("MULTI");
- } catch (...) {
- GTEST_SKIP();
- }
- std::shared_ptr<IExtension> ext(new TestExtension());
- checkExtensionNotRemovedFromAnotherEngineObject({"MultiDevice", ext, {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"}}});
-}
-
-TEST_F(smoke_ExtensionTest, MULTI_no_share_extension_between_engines) {
- try {
- InferenceEngine::Core ie;
- ie.GetVersions("MULTI");
- } catch (...) {
- GTEST_SKIP();
- }
- std::shared_ptr<IExtension> ext(new TestExtension());
- checkNotSharedExtensions(ext, "MULTI:CPU");
-}
INSTANTIATE_TEST_CASE_P(
smoke_IEClassGetMetricTest, IEClassGetMetricTest_SUPPORTED_CONFIG_KEYS,
- ::testing::Values("CPU", "HETERO"));
+ ::testing::Values("CPU", "MULTI", "HETERO"));
INSTANTIATE_TEST_CASE_P(
smoke_IEClassGetMetricTest, IEClassGetMetricTest_SUPPORTED_METRICS,
- ::testing::Values("CPU", "HETERO"));
+ ::testing::Values("CPU", "MULTI", "HETERO"));
INSTANTIATE_TEST_CASE_P(
smoke_IEClassGetMetricTest, IEClassGetMetricTest_AVAILABLE_DEVICES,
INSTANTIATE_TEST_CASE_P(
smoke_IEClassGetMetricTest, IEClassGetMetricTest_FULL_DEVICE_NAME,
- ::testing::Values("CPU", "HETERO"));
+ ::testing::Values("CPU", "MULTI", "HETERO"));
INSTANTIATE_TEST_CASE_P(
smoke_IEClassGetMetricTest, IEClassGetMetricTest_OPTIMIZATION_CAPABILITIES,
INSTANTIATE_TEST_CASE_P(
smoke_IEClassGetMetricTest, IEClassGetMetricTest_ThrowUnsupported,
- ::testing::Values("CPU", "HETERO"));
+ ::testing::Values("CPU", "MULTI", "HETERO"));
INSTANTIATE_TEST_CASE_P(
smoke_IEClassGetConfigTest, IEClassGetConfigTest_ThrowUnsupported,
- ::testing::Values("CPU", "HETERO"));
+ ::testing::Values("CPU", "MULTI", "HETERO"));
INSTANTIATE_TEST_CASE_P(
smoke_IEClassGetAvailableDevices, IEClassGetAvailableDevices,
INSTANTIATE_TEST_CASE_P(
smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS,
- ::testing::Values("CPU", "HETERO:CPU"));
+ ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
INSTANTIATE_TEST_CASE_P(
smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS,
- ::testing::Values("CPU", "HETERO:CPU"));
+ ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
INSTANTIATE_TEST_CASE_P(
smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_NETWORK_NAME,
- ::testing::Values("CPU", "HETERO:CPU"));
+ ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
INSTANTIATE_TEST_CASE_P(
smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
- ::testing::Values("CPU", "HETERO:CPU"));
+ ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
INSTANTIATE_TEST_CASE_P(
smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_ThrowsUnsupported,
- ::testing::Values("CPU", "HETERO:CPU"));
+ ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
//
// Executable Network GetConfig / SetConfig
smoke_Inception,
ModelTransformationsTest,
::testing::Values(
- TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, false, createParam(), {}, 3ul),
- TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamI8I8(), {}, 0, false),
- TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8I8(), {}, 0),
- TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8U8(), {}, 0),
- TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateLevel)),
- TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateIntervals)),
- TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, true, false, createParam()),
- TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 2ul, true, false, createParam())
+ TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, false, createParam(), {}, 3ul),
+ TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamI8I8(), {}, 0, false),
+ TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8I8(), {}, 0),
+ TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8U8(), {}, 0),
+ TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateLevel)),
+ TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateIntervals)),
+ TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, true, false, createParam()),
+ TransformationsParams("CPU", getModelParams("inception_v3_tf"), 2ul, true, false, createParam())
),
TransformationsParams::getLowPrecisionTransformerSingleLayerTestName);
smoke_MobileNet,
ModelTransformationsTest,
::testing::Values(
- TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false),
+ TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false),
// TODO: eshoguli: fix this issue
-// TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamI8I8()),
-// TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8I8()),
-// TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8U8(), {}, 2),
-// TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamCpu(), { "464/Pool", "465/Pool" }),
- TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, true),
- TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 2ul, true)
+// TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamI8I8()),
+// TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8I8()),
+// TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8U8(), {}, 2),
+// TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamCpu(), { "464/Pool", "465/Pool" }),
+ TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, true),
+ TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 2ul, true)
),
TransformationsParams::getLowPrecisionTransformerSingleLayerTestName);
smoke_ResNet,
ModelTransformationsTest,
::testing::Values(
- TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false),
- TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false, true, createParamI8I8(), {
+ TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false),
+ TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamI8I8(), {
// TODO: remove when eltwise validation was added
"resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
"resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
}),
- TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8I8(), {
+ TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8I8(), {
// // TODO: remove when eltwise validation was added
"resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
"resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
}),
- TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8U8(), {
+ TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8U8(), {
// TODO: remove when eltwise validation was added
"resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
"resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
}),
- TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false, true, createParamCpu(), {
+ TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamCpu(), {
// TODO: remove when eltwise validation was added
"resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
"resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
}),
- TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, true),
- TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 2ul, true)
+ TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, true),
+ TransformationsParams("CPU", getModelParams("resnet_50_tf"), 2ul, true)
),
TransformationsParams::getLowPrecisionTransformerSingleLayerTestName);
::testing::Values(
SingleLayerTransformationsTestParams(
"CPU",
+ PowerTestModel::Ptr(new PowerTestModel(1.f, 1.f, 0)),
+ { { 1, 3, 299, 299 } },
+ { { 1, 3, 299, 299 } }),
+
+ SingleLayerTransformationsTestParams(
+ "CPU",
+ PowerTestModel::Ptr(new PowerTestModel(1.f, 2.89f, 64)),
+ { { 1, 3, 299, 299 } },
+ { { 1, 3, 299, 299 } }),
+
+ SingleLayerTransformationsTestParams(
+ "CPU",
+ PowerTestModel::Ptr(new PowerTestModel(1.f, -32.f, 0)),
+ { { 1, 3, 299, 299 } },
+ { { 1, 3, 299, 299 } }),
+
+ SingleLayerTransformationsTestParams(
+ "CPU",
+ PowerTestModel::Ptr(new PowerTestModel(1.f, 1.f, -64.f)),
+ { { 1, 3, 299, 299 } },
+ { { 1, 3, 299, 299 } }),
+
+ SingleLayerTransformationsTestParams(
+ "CPU",
+ PowerTestModel::Ptr(new PowerTestModel(3.5f, 1.f, 0)),
+ { { 1, 3, 299, 299 } },
+ { { 1, 3, 299, 299 } }),
+
+ SingleLayerTransformationsTestParams(
+ "CPU",
SingleLayerTestModel::Ptr(new ResampleTestModel()),
{ { 1, 32, 149, 149 } },
{ { 1, 32, 147, 147 } }),
${NGRAPH_LIBRARIES}
ie_tests
ngraphFunctions
- inference_engine_ir_readers
)
file(GLOB SHARED_TESTS_SRC
)
add_library(${TARGET_NAME} STATIC ${SHARED_TESTS_SRC})
-add_dependencies(${TARGET_NAME} inference_engine_preproc mock_engine)
+add_dependencies(${TARGET_NAME} inference_engine_preproc MultiDevicePlugin mock_engine)
if(ENABLE_MKL_DNN)
add_dependencies(${TARGET_NAME} MKLDNNPlugin)
}
template<int Version = 3>
- static details::CNNNetworkImplPtr
+ static InferenceEngine::CNNNetwork
buildSingleLayerNetwork(const std::string &layerType,
const CommonTestUtils::InOutShapes &inOutShapes,
std::map<std::string, std::string> *params,
size_t weightsSize = 0,
size_t biasesSize = 0,
const TBlob<uint8_t>::Ptr &weights = nullptr) {
- auto *parser = new details::FormatParser(Version);
- return buildSingleLayerNetworkCommon<Version>(parser, layerType, inOutShapes, params, layerDataName, precision,
+ return buildSingleLayerNetworkCommon<Version>(layerType, inOutShapes, params, layerDataName, precision,
weightsSize, biasesSize, weights);
}
auto weights = createWeights(elementSize, weightByteSize, biasByteSize);
- auto networkImplPtr = buildSingleLayerNetwork<3>(layerHelper->getType(), initialShapes, ¶ms, "data",
- pluginParams.precision, weightByteSize, biasByteSize, weights);
+ auto network = buildSingleLayerNetwork<3>(layerHelper->getType(), initialShapes, ¶ms, "data",
+ pluginParams.precision, weightByteSize, biasByteSize, weights);
- CNNNetwork network(networkImplPtr);
std::tie(inputName, inputData) = (*network.getInputsInfo().begin());
inputData->setPrecision(pluginParams.precision);
inputData->setLayout(pluginParams.layout);
using namespace InferenceEngine::details;
using namespace InferenceEngine::PluginConfigParams;
+#define CHECK_MULTI() do { \
+ try { \
+ Core ie; \
+ ie.GetVersions("MULTI"); \
+ } catch (...) { \
+ GTEST_SKIP(); \
+ } \
+ } while(false)\
+
class IEClassBasicTest : public TestsCommon {
public:
void SetUp() override {
GTEST_COUT << "Core created " << testIndex << std::endl;
ASSERT_NO_THROW(ie.RegisterPlugins(wStringtoMBCSstringChar(pluginsXmlW)));
CommonTestUtils::removeFile(pluginsXmlW);
-
+#if defined __linux__ && !defined(__APPLE__)
+ ASSERT_NO_THROW(ie.GetVersions("mock")); // from pluginXML
+#endif
ASSERT_NO_THROW(ie.GetVersions(deviceName));
GTEST_COUT << "Plugin created " << testIndex << std::endl;
}
TEST_F(IEClassBasicTest, smoke_ImportNetworkMultiThrows) {
- Core ie;
-
+ CHECK_MULTI();
+ InferenceEngine::Core ie;
ASSERT_THROW(ie.ImportNetwork("model", "MULTI"), InferenceEngineException);
}
}
TEST_P(IEClassNetworkTestP, QueryNetworkMultiThrows) {
+ CHECK_MULTI();
Core ie;
ASSERT_THROW(ie.QueryNetwork(actualNetwork, "MULTI"), InferenceEngineException);
}
//
TEST_P(IEClassLoadNetworkTest, LoadNetworkHETEROwithMULTINoThrow) {
- Core ie;
+ CHECK_MULTI();
+ Core ie;
if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
std::string devices;
auto availableDevices = ie.GetMetric(deviceName, METRIC_KEY(AVAILABLE_DEVICES)).as<std::vector<std::string>>();
}
TEST_P(IEClassLoadNetworkTest, LoadNetworkMULTIwithHETERONoThrow) {
+ CHECK_MULTI();
Core ie;
if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
//
TEST_P(IEClassLoadNetworkTest, QueryNetworkHETEROwithMULTINoThrowv7) {
+ CHECK_MULTI();
Core ie;
if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
}
TEST_P(IEClassLoadNetworkTest, QueryNetworkMULTIwithHETERONoThrowv7) {
+ CHECK_MULTI();
Core ie;
if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
}
TEST_P(IEClassLoadNetworkTest, QueryNetworkHETEROwithMULTINoThrowv10) {
+ CHECK_MULTI();
Core ie;
if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
}
TEST_P(IEClassLoadNetworkTest, DISABLED_QueryNetworkMULTIwithHETERONoThrowv10) {
+ CHECK_MULTI();
Core ie;
if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
using IEClassLoadNetworkAfterCoreRecreateTest = IEClassLoadNetworkTest;
TEST_P(IEClassLoadNetworkAfterCoreRecreateTest, LoadAfterRecreateCoresAndPlugins) {
+ CHECK_MULTI();
+ Core ie;
{
- Core ie;
auto versions = ie.GetVersions("MULTI:" + deviceName + ",CPU");
ASSERT_EQ(3, versions.size());
}
auto refOutData = ngraph::helpers::inferFnWithInterp<ngraph::element::Type_t::f32>(fnPtr,
{blob->cbuffer().as<float *>()});
- auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
+ float thr1, thr2;
+ FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32, thr1, thr2);
+
std::vector<size_t> inShapeLimited{size_t(bsz), 4, 20, 20};
size_t outElementsCount = std::accumulate(begin(inShapeLimited), end(inShapeLimited), 1, std::multiplies<size_t>());
- FuncTestUtils::compareRawBuffers(outRawData, *refOutData[0], outElementsCount, outElementsCount, thr);
+ FuncTestUtils::compareRawBuffers(outRawData, *refOutData[0], outElementsCount, outElementsCount,
+ FuncTestUtils::CompareType::ABS_AND_REL,
+ thr1, thr2);
if (GetParam().deviceName.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
PluginCache::get().reset();
}
}
struct network_params {
- std::string pluginName;
+ std::string deviceName;
std::string modelFile;
std::string imageName;
std::string statFile;
result += statFile;
return result;
}
-
- std::string plugin() { return pluginName + "Plugin"; }
-
- std::string deviceName() {
- if (pluginName == "MultiDevice") {
- return "MULTI:CPU";
- }
- if (pluginName == "MKLDNN") {
- return "CPU";
- }
-
- return "";
- }
-
};
static LayerTransformation::Params createParam() {
const size_t classesCanBeChangedIndex = 9999,
const bool compareRawValues = true,
const std::unordered_set<std::string>& removedLayers = {}) :
- pluginName(""),
+ deviceName(""),
modelParams(ModelParams("", "", "", {})),
batchSize(1ul),
transformationsInPluginEnabled(transformationsInPluginEnabled),
removedLayers(removedLayers) {}
TransformationsParams(
- const std::string pluginName,
+ const std::string deviceName,
const ModelParams modelParams,
const size_t batchSize,
const bool transformationsInPluginEnabled = true,
const std::unordered_set<std::string>& removedLayers = {},
const std::vector<std::pair<std::string, std::vector<float>>> inputs = {},
const std::vector<std::pair<std::string, std::shared_ptr<LayerTransformation>>> transformations = {}) :
- pluginName(pluginName),
+ deviceName(deviceName),
modelParams(modelParams),
batchSize(batchSize),
transformationsInPluginEnabled(transformationsInPluginEnabled),
compareRawValues(compareRawValues),
removedLayers(removedLayers) {}
- const std::string pluginName;
+ const std::string deviceName;
const ModelParams modelParams;
const size_t batchSize;
// transformationsParams.transformationsInPluginEnabled ? PluginConfigParams::YES : PluginConfigParams::NO);
usedNetwork = cloneNet(network);
- ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.deviceName(), config);
+ ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.deviceName, config);
InferRequest inferRequest = exeNetwork.CreateInferRequest();
if (inputs.empty()) {
Blob::Ptr src = readInput(p.image(), batch_size);
}
network_params p{
- "MKLDNN",
+ "CPU",
transformationsParam.modelParams.irFilePath,
transformationsParam.modelParams.dataFilePath,
"",
protected:
virtual void RunTITest(const std::map<std::string, std::string> & config = {}) {
-
try {
ti_test_params p = param();
std::string model = getModel(p);
auto weights = make_shared_blob<uint8_t>(TensorDesc {Precision::U8, {p.precision.size() * p.tensorSize}, C});
weights->allocate();
- auto weights_size = details::product(std::begin(weights->getTensorDesc().getDims()), std::end(weights->getTensorDesc().getDims()));
+
if (p.precision == Precision::FP32) {
- std::vector<float> weights_vector(weights_size, 1.0f);
- ie_memcpy(weights->buffer().as<float *>(), sizeof(float), &weights_vector[0], weights_vector.size() * sizeof(float));
- } else {
+ std::vector<float> weights_vector(p.tensorSize, 1.0f);
+ ie_memcpy(weights->buffer().as<float *>(), p.tensorSize * sizeof(float),
+ &weights_vector[0], p.tensorSize * sizeof(float));
+ } else if (p.precision == Precision::FP16) {
// FP16 case
- std::vector<ie_fp16> weights_vector(weights_size, PrecisionUtils::f32tof16(1.0f));
- ie_memcpy(weights->buffer().as<ie_fp16 *>(), sizeof(ie_fp16), &weights_vector[0], weights_vector.size() * sizeof(ie_fp16));
+ std::vector<ie_fp16> weights_vector(p.tensorSize, PrecisionUtils::f32tof16(1.0f));
+ ie_memcpy(weights->buffer().as<ie_fp16 *>(), p.tensorSize * sizeof(ie_fp16),
+ &weights_vector[0], p.tensorSize * sizeof(ie_fp16));
+ } else {
+ ASSERT_TRUE(false);
}
Core ie;
std::map<std::string, std::string> fake_quantize_params = {
{"levels", "256"}
};
+
std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
+ {"power", "2"}, {"scale", "1"}, {"shift", "0"}
};
std::vector<std::pair<std::string, std::string>> edges = {
std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
std::map<std::string, std::string> const_params = {};
std::map<std::string, std::string> fakeQuantizeParams = {{ "levels", "256" }};
- std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
+ std::map<std::string, std::string> power_params = {{"power", "2"}, {"scale", "1"}, {"shift", "0"}};
std::map<std::string, std::string> poolingParams = { {"kernel", "7,1"}, { "pool-method", "avg" }, { "strides", "1,1" } };
std::vector<std::pair<std::string, std::string>> edges = {
std::string getModel(SingleLayerTransformationsTestParams& p) const override;
};
+class PowerTestModel : public SingleLayerTestModel {
+public:
+ PowerTestModel(const float& power, const float& scale, const float& shift) : power(power), scale(scale), shift(shift) {}
+ void resetTransformation(CNNNetwork& network) const override;
+ std::string getName() const override;
+ bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
+ std::string getModel(SingleLayerTransformationsTestParams& p) const override;
+
+private:
+ const float power;
+ const float scale;
+ const float shift;
+};
+
class ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel : public SingleLayerTestModel {
public:
std::string getModel(SingleLayerTransformationsTestParams& p) const override;
--- /dev/null
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformer_single_layer_tests.hpp"
+
+std::string PowerTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
+ size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
+ if (p._network_precision == "FP16")
+ type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
+
+ CommonTestUtils::pool_common_params pooling = { {1, 1}, {1, 1}, {0, 0}, {0, 0}, "valid", false, true };
+ std::vector<size_t> poolOutShape(p.inputDimensions[0].size());
+ CommonTestUtils::getPoolOutShape(p.inputDimensions[0], pooling, poolOutShape);
+
+ std::map<std::string, std::string> power_params = {{"power", std::to_string(power)}, {"scale", std::to_string(scale)}, {"shift", std::to_string(shift)}};
+ std::map<std::string, std::string> const_params = {};
+ std::map<std::string, std::string> fake_quantize_params = {{"levels", "256"}};
+
+ std::vector<std::pair<std::string, std::string>> edges = {
+ {"0,0", "1,1"}, {"1,2", "6,7"}, // power
+ {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // const
+ {"6,12", "7,13"}, {"7,14", "8,15"} // pool, power
+ };
+
+ return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
+ "Conv_ScaleShift_transformations", p.inputDimensions[0], p._network_precision)
+ .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
+ .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
+ .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
+ .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
+ .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
+ .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
+ .poolingLayer(p._network_precision, {{p.inputDimensions[0]}, {poolOutShape}}, pooling)
+ .addLayer("Power", p._network_precision, &power_params, {{poolOutShape}, {poolOutShape}})
+ .finish(&edges);
+}
+
+void PowerTestModel::resetTransformation(CNNNetwork& network) const {
+ fillData(getLayer(network, "Const2"), 63.5, "custom");
+ fillData(getLayer(network, "Const3"), 127.0, "custom");
+ fillData(getLayer(network, "Const4"), 63.5, "custom");
+ fillData(getLayer(network, "Const5"), 127.0, "custom");
+}
+
+std::string PowerTestModel::getName() const {
+ return std::string("PowerTestModel") +
+ (power == 1.f ? std::string("") : "_power!=1") +
+ (scale == 1.f ? "" : "_scale=" + std::to_string(scale)) +
+ (shift == 0 ? "" : "_shift!=" + std::to_string(shift));
+}
+
+bool PowerTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
+ LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
+ transformer.transform(network);
+
+ const Precision precision = params.updatePrecisions ? Precision(Precision::U8) : network.getPrecision();
+
+ CNNLayerPtr fakeQuantize = getLayer(network, "FakeQuantize6");
+ if (fakeQuantize->outData[0]->getPrecision() != precision) {
+ THROW_IE_EXCEPTION << fakeQuantize->name << " precision " << precision << " is not correct";
+ }
+
+ CNNLayerPtr pooling = getLayer(network, "Pooling7");
+ if (pooling->outData[0]->getPrecision() != precision) {
+ THROW_IE_EXCEPTION << pooling->name << " precision " << precision << " is not correct";
+ }
+
+ CNNLayerPtr powerLayer = getLayer(network, "Power8");
+
+ const bool deleteLayer = params.quantizeOutputs && power == 1.f && powerLayer != nullptr && powerLayer->type == "Power";
+
+ if (deleteLayer) {
+ THROW_IE_EXCEPTION << "Power layer is present after transformation";
+ }
+
+ return true;
+}
std::map<std::string, std::string> const_params = {};
std::map<std::string, std::string> fake_quantize_params = {{"levels", "256"}};
- std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
+ std::map<std::string, std::string> power_params = {{"power", "2"}, {"scale", "1"}, {"shift", "0"}};
std::vector<std::pair<std::string, std::string>> edges = {
{"0,0", "1,1"}, {"1,2", "6,7"}, // Power
{"axis", "1"}
};
std::map<std::string, std::string> power_params = {
- {"power", "1"}, {"scale", "1"}, {"shift", "0"}
+ {"power", "2"}, {"scale", "1"}, {"shift", "0"}
};
std::vector<std::pair<std::string, std::string>> edges = {
std::map<std::string, std::string> const_params = {};
std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
- std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
+ std::map<std::string, std::string> power_params = { {"power", "2"}, {"scale", "1"}, {"shift", "0"} };
std::vector<std::pair<std::string, std::string>> edges = {
{"0,0", "1,1"}, // Input -> Power
{"1,2", "6,7"}, // Power -> FakeQuantize
const float threshold = p.model->getThreshold(p.device_name, net_precision, param);
const float zeroThreshold = p.model->getZeroThreshold();
- // const float threshold = net_precision == Precision::FP16 ? 0.0005f : 0.0003f;
+
+ const auto outName = transformedOutput.find(name);
+ if (outName == transformedOutput.end()) {
+ THROW_IE_EXCEPTION << "Original output name " + name + " doesn't exist in transformed model";
+ }
+
relative_compare(
- CNNNetworkHelper::getFloatData(transformedOutput.find(name)->second).get(),
+ CNNNetworkHelper::getFloatData(outName->second).get(),
CNNNetworkHelper::getFloatData(originalOutput.second).get(),
outSize,
threshold,
${CMAKE_CURRENT_SOURCE_DIR}/common/regression/helpers
LINK_LIBRARIES
IESharedTests
- inference_engine_ir_readers
vpu_graph_transformer
vpu_custom_kernels
DEFINES
LINK_LIBRARIES
IESharedTests
vpu_custom_kernels
- inference_engine_ir_readers
LINK_LIBRARIES_WHOLE_ARCHIVE
myriadTestData
VPUCommonTests
#include "myriad_layers_CTCDecoder_test.hpp"
-INSTANTIATE_TEST_CASE_P(myriad, myriadCTCDecoderLayerTests_nightly,
- ::testing::Combine(
- ::testing::Values(true, false),
- ::testing::ValuesIn(s_DimsConfig)));
+INSTANTIATE_TEST_CASE_P(
+ accuracy, myriadCTCDecoderLayerTests_smoke,
+ ::testing::Combine(
+ ::testing::Values<Dims>({{1, 88, 1, 71}}),
+ ::testing::Values<HwOptimization>(true, false),
+ ::testing::Values<IRVersion>(IRVersion::v7, IRVersion::v10),
+ ::testing::ValuesIn(s_CustomConfig)
+));
#define ERROR_BOUND 0.2f
-typedef struct {
- SizeVector src_dims;
- SizeVector seq_ind_dims;
- SizeVector dst_dims;
- std::string custom_config;
-} dims_config;
+PRETTY_PARAM(CustomConfig, std::string);
+PRETTY_PARAM(HwOptimization, bool);
-PRETTY_PARAM(hwAcceleration, std::string);
-PRETTY_PARAM(dimsConfig, dims_config);
-
-typedef myriadLayerTestBaseWithParam<std::tuple<bool, dims_config>> myriadCTCDecoderLayerTests_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, HwOptimization, IRVersion, CustomConfig>>
+ myriadCTCDecoderLayerTests_smoke;
void refCTCDecoder(const Blob::Ptr src, const Blob::Ptr seq_ind, Blob::Ptr dst) {
ie_fp16 *src_data = static_cast<ie_fp16*>(src->buffer());
}
}
-TEST_P(myriadCTCDecoderLayerTests_nightly, CTCGreedyDecoder) {
-
- bool HWConfigValue = std::get<0>(GetParam());
- dims_config dimsConfig = std::get<1>(GetParam());
-
- if(!dimsConfig.custom_config.empty() && !CheckMyriadX()) {
- GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
- }
+TEST_P(myriadCTCDecoderLayerTests_smoke, CTCGreedyDecoder) {
+ const tensor_test_params dims = std::get<0>(GetParam());
+ const bool hwOptimization = std::get<1>(GetParam());
+ _irVersion = std::get<2>(GetParam());
+ const std::string customConfig = std::get<3>(GetParam());
- _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = dimsConfig.custom_config;
+ if (!customConfig.empty() && !CheckMyriadX()) {
+ GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+ }
- IN_OUT_desc inputTensors;
- IN_OUT_desc outputTensors;
+ _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
- inputTensors.resize(2);
- outputTensors.resize(1);
-
- inputTensors[0] = dimsConfig.src_dims;
- inputTensors[1] = dimsConfig.seq_ind_dims;
- outputTensors[0] = dimsConfig.dst_dims;
+ const auto inputTensors = IN_OUT_desc{{dims.c, dims.h, dims.w}, {dims.h, dims.c}};
+ const auto outputTensors = IN_OUT_desc{{1, 1, dims.h, dims.c}};
SetInputTensors(inputTensors);
SetOutputTensors(outputTensors);
- ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("CTCGreedyDecoder"), NetworkInitParams().useHWOpt(HWConfigValue)));
+ std::map<std::string, std::string> params;
+ params["ctc_merge_repeated"] = "1";
- auto iter = _inputMap.begin();
- auto first_input = iter->first;
- ++iter;
- auto second_input = iter->first;
+ ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("CTCGreedyDecoder").params(params),
+ NetworkInitParams()
+ .useHWOpt(hwOptimization)
+ .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+ .lockLayout(true)));
- Blob::Ptr data;
- auto dataBlob = _inputMap[first_input];
+ auto dataBlob = _inputMap.begin()->second;
+ auto seqIndBlob = std::next(_inputMap.begin())->second;
- auto seqIndBlob = _inputMap[second_input];
- uint16_t *blobRawSeqFp16 = seqIndBlob->buffer().as<uint16_t *>();
- size_t count = seqIndBlob->size();
- blobRawSeqFp16[0] = PrecisionUtils::f32tof16(0.0);
- for (size_t indx = 1; indx < count; ++indx) {
- blobRawSeqFp16[indx] = PrecisionUtils::f32tof16(1.0);
+ auto seqIndFp16 = seqIndBlob->buffer().as<uint16_t *>();
+ seqIndFp16[0] = PrecisionUtils::f32tof16(0.0);
+ for (size_t i = 1; i < seqIndBlob->size(); ++i) {
+ seqIndFp16[i] = PrecisionUtils::f32tof16(1.0);
}
std::string inputTensorBinary = TestDataHelpers::get_data_path() + "/vpu/InputGreedyDecoderMyriadCHW.bin";
ASSERT_TRUE(Infer());
- auto outputBlob = _outputMap.begin()->second;
-
- _refBlob = make_shared_blob<ie_fp16>(TensorDesc(Precision::FP16, outputBlob->getTensorDesc().getDims(), ANY));
- _refBlob->allocate();
-
refCTCDecoder(dataBlob, seqIndBlob, _refBlob);
- CompareCommonAbsolute(outputBlob, _refBlob, 0.0);
+ CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0.0);
}
-static std::vector<dims_config> s_DimsConfig = {
- {{88, 1, 71}, {88, 1}, {1, 88, 1, 1}, ""},
+static std::vector<CustomConfig> s_CustomConfig = {
+ {""},
#ifdef VPU_HAS_CUSTOM_KERNELS
- {{88, 1, 71}, {88, 1}, {1, 88, 1, 1}, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
+ getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
#endif
};
#include "myriad_layers_batch_normalization_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsBatchNormalization_nightly,
+ accuracy, myriadLayersTestsBatchNormalization_smoke,
::testing::Values(
bn_test_params{{1, 1, 16, 8}, 0.001f},
bn_test_params{{1, 4, 8, 16}, 0.00001f},
}
}
-class myriadLayersTestsBatchNormalization_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsBatchNormalization_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<bn_test_params> {
};
-TEST_P(myriadLayersTestsBatchNormalization_nightly, TestsBatchNorm)
+TEST_P(myriadLayersTestsBatchNormalization_smoke, TestsBatchNorm)
{
bn_test_params p = ::testing::WithParamInterface<bn_test_params>::GetParam();
size_t sz_weights = p.in.c;
#include "myriad_layers_bias_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsBias_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsBias_smoke,
::testing::ValuesIn(s_biasDims)
);
}
}
-class myriadLayersTestsBias_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsBias_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<InferenceEngine::SizeVector> {
};
-TEST_P(myriadLayersTestsBias_nightly, TestsBias) {
+TEST_P(myriadLayersTestsBias_smoke, TestsBias) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
auto input_dim = GetParam();
InferenceEngine::SizeVector input_dim1;
using namespace InferenceEngine;
using namespace ::testing;
-typedef myriadLayerTestBaseWithParam<std::string> myriadBlobTests_nightly;
+typedef myriadLayerTestBaseWithParam<std::string> myriadBlobTests_smoke;
std::vector<char> readBinFile(std::string filename)
{
return vec;
}
-TEST_P(myriadBlobTests_nightly, CanGetSameBlobsOnSameIR) {
+TEST_P(myriadBlobTests_smoke, CanGetSameBlobsOnSameIR) {
std::string HWConfigValue = GetParam();
auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
}
}
-INSTANTIATE_TEST_CASE_P(accuracy, myriadBlobTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadBlobTests_smoke,
::testing::Values(CONFIG_VALUE(YES), CONFIG_VALUE(NO))
);
-using myriadBlobExportTests_nightly = myriadLayersTests_nightly;
+using myriadBlobExportTests_smoke = myriadLayersTests_nightly;
-TEST_F(myriadBlobExportTests_nightly, CanNotDoImportOnNonExistFile)
+TEST_F(myriadBlobExportTests_smoke, CanNotDoImportOnNonExistFile)
{
InferenceEngine::IExecutableNetwork::Ptr importedNetworkPtr;
ASSERT_EQ(StatusCode::NETWORK_NOT_READ, _vpuPluginPtr->ImportNetwork(importedNetworkPtr, "I_dont_exist.blob", {}, nullptr));
}
-TEST_F(myriadBlobExportTests_nightly, CanInferImportedNetworkOnExportedBlob)
+TEST_F(myriadBlobExportTests_smoke, CanInferImportedNetworkOnExportedBlob)
{
auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
ASSERT_NO_THROW(_cnnNetwork = CNNNetwork(fnPtr));
ASSERT_EQ(StatusCode::OK, inferRequest->Infer(&_resp)) << _resp.msg;
}
-TEST_F(myriadBlobExportTests_nightly, CanGetPerfCountsImportedNetwork)
+TEST_F(myriadBlobExportTests_smoke, CanGetPerfCountsImportedNetwork)
{
auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
ASSERT_NO_THROW(_cnnNetwork = CNNNetwork(fnPtr));
}
}
-class myriadConfigsWithBlobImportTests_nightly: public myriadLayersTests_nightly {
+class myriadConfigsWithBlobImportTests_smoke: public myriadLayersTests_nightly {
protected:
// use this stream to redirect cout to it,
// needs to be able check output on warnings
};
-TEST_F(myriadConfigsWithBlobImportTests_nightly, TryingToSetCompileOptionPrintsWarning)
+TEST_F(myriadConfigsWithBlobImportTests_smoke, TryingToSetCompileOptionPrintsWarning)
{
auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
ASSERT_NO_THROW(_cnnNetwork = CNNNetwork(fnPtr));
}
}
-TEST_F(myriadConfigsWithBlobImportTests_nightly, TryingToSetRuntimeOptionDoesNotPrintWarning)
+TEST_F(myriadConfigsWithBlobImportTests_smoke, TryingToSetRuntimeOptionDoesNotPrintWarning)
{
auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
ASSERT_NO_THROW(_cnnNetwork = CNNNetwork(fnPtr));
}
-using myriadBlobExportAccuracyDifferentCountInAndOutTests_nightly = myriadLayerTestBaseWithParam<std::vector<size_t>>;
+using myriadBlobExportAccuracyDifferentCountInAndOutTests_smoke = myriadLayerTestBaseWithParam<std::vector<size_t>>;
-TEST_F(myriadBlobExportAccuracyDifferentCountInAndOutTests_nightly, IsResultOfImportedAndGeneratedModelSame)
+TEST_F(myriadBlobExportAccuracyDifferentCountInAndOutTests_smoke, IsResultOfImportedAndGeneratedModelSame)
{
SetSeed(DEFAULT_SEED_VALUE);
}
-using myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_nightly = myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::Precision, InferenceEngine::Precision>>;
+using myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_smoke = myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::Precision, InferenceEngine::Precision>>;
-TEST_P(myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_nightly, IsResultOfImportedAndGeneratedModelSame)
+TEST_P(myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_smoke, IsResultOfImportedAndGeneratedModelSame)
{
SetSeed(DEFAULT_SEED_VALUE);
InferenceEngine::Precision inputPrecision = std::get<0>(GetParam());
CompareCommonAbsolute(importedOutputBlobPtr, outputBlobPtr, 0.f);
}
-using myriadExtraTests_nightly = myriadLayersTests_nightly;
+using myriadExtraTests_smoke = myriadLayersTests_nightly;
-TEST_F(myriadExtraTests_nightly, ThereIsNoSegfaultOnZeroConvolutionWeights) {
+TEST_F(myriadExtraTests_smoke, ThereIsNoSegfaultOnZeroConvolutionWeights) {
if (!CheckMyriadX()) {
SKIP() << "Non-MyriadX device";
}
static const std::vector<InferenceEngine::Precision> outputPrecisions = {InferenceEngine::Precision::FP16, InferenceEngine::Precision::FP32};
-INSTANTIATE_TEST_CASE_P(accuracy, myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_smoke,
::testing::Combine(::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(outputPrecisions)));
\ No newline at end of file
#include "myriad_layers_clamp_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsClampParams_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsClampParams_smoke,
::testing::Combine(
::testing::ValuesIn(s_clampTensors),
::testing::ValuesIn(s_clampParams))
};
};
-typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, clamp_test_params>> myriadLayersTestsClampParams_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, clamp_test_params>> myriadLayersTestsClampParams_smoke;
-TEST_P(myriadLayersTestsClampParams_nightly, TestsClamp) {
+TEST_P(myriadLayersTestsClampParams_smoke, TestsClamp) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
auto param = GetParam();
SizeVector tensor = std::get<0>(param);
#include "myriad_layers_concat_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsConcat_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsConcat_smoke,
::testing::Combine(
::testing::ValuesIn(s_concatCores),
::testing::ValuesIn(s_axis),
getTestCaseName
);
-TEST_F(myriadLayersTestsConcat_nightly, ConcatAfterNormalize) {
+TEST_F(myriadLayersTestsConcat_smoke, ConcatAfterNormalize) {
const std::string model = R"V0G0N(
<Net name="ConcatAfterNormalize" version="2" batch="1">
<layers>
using namespace InferenceEngine;
using myriadConcatTestParams = std::tuple<InferenceEngine::SizeVector, int32_t, InferenceEngine::SizeVector, int32_t, int32_t >;
-typedef myriadLayerTestBaseWithParam<myriadConcatTestParams> myriadLayersTestsConcat_nightly;
+typedef myriadLayerTestBaseWithParam<myriadConcatTestParams> myriadLayersTestsConcat_smoke;
void CheckOutput(const InferenceEngine::BlobMap& input, InferenceEngine::Blob::Ptr actual, int32_t axis) {
int32_t OW = 1;
ASSERT_NE(n_checks, 0);
}
-TEST_P(myriadLayersTestsConcat_nightly, Concat) {
+TEST_P(myriadLayersTestsConcat_smoke, Concat) {
auto param = GetParam();
auto core = std::get<0>(param);
auto axis = std::get<1>(param);
//
//----------------------------------------------------------------------
-INSTANTIATE_TEST_CASE_P(tricky_ncdhw_userpad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(tricky_ncdhw_userpad, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 19, 65, 47}),
Values(KernelShape {1, 3, 5}),
)
);
-INSTANTIATE_TEST_CASE_P(tricky_ncdhw_autopad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(tricky_ncdhw_autopad, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 19, 65, 47}),
Values(KernelShape {1, 3, 5}),
//
//----------------------------------------------------------------------
-INSTANTIATE_TEST_CASE_P(simple_ncdhw_userpad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(simple_ncdhw_userpad, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 20, 64, 48}),
Values(KernelShape {3, 3, 3}),
)
);
-INSTANTIATE_TEST_CASE_P(simple_ncdhw_autopad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(simple_ncdhw_autopad, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 20, 64, 48}),
Values(KernelShape {3, 3, 3}),
//
//----------------------------------------------------------------------
-INSTANTIATE_TEST_CASE_P(tricky_nchw_userpad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(tricky_nchw_userpad, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 65, 47}),
Values(KernelShape {1, 3}),
)
);
-INSTANTIATE_TEST_CASE_P(tricky_nchw_autopad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(tricky_nchw_autopad, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 65, 47}),
Values(KernelShape {1, 3}),
//
//----------------------------------------------------------------------
-INSTANTIATE_TEST_CASE_P(simple_nchw_userpad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(simple_nchw_userpad, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 64, 48}),
Values(KernelShape {3, 3}),
)
);
-INSTANTIATE_TEST_CASE_P(simple_nchw_autopad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(simple_nchw_autopad, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 64, 48}),
Values(KernelShape {3, 3}),
// NB: requires 1GB of RAM on device (e.g. ma2085 board)
// Stress test: large image with large depth, large kernel
-INSTANTIATE_TEST_CASE_P(i3d_id6, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id6, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 79, 224, 224}),
Values(KernelShape {7, 7, 7}),
Values(Groups(1))));
// Like `i3d_id6` test but with smaller image (so must fit in Myriad X)
-INSTANTIATE_TEST_CASE_P(i3d_id6_shrink, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id6_shrink, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 3, 39, 112, 112}),
Values(KernelShape {7, 7, 7}),
Values(Groups(1))));
// Average-size image, trivial kernel 1x1x1
-INSTANTIATE_TEST_CASE_P(i3d_id12, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id12, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 64, 40, 56, 56}),
Values(KernelShape {1, 1, 1}),
Values(Groups(1))));
// Average-size image, non-trivial kernel 3x3x3
-INSTANTIATE_TEST_CASE_P(i3d_id17, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id17, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 64, 40, 56, 56}),
Values(KernelShape {3, 3, 3}),
Values(Groups(1))));
// Small image (7x7), trivial kernel
-INSTANTIATE_TEST_CASE_P(i3d_id249, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id249, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 832, 10, 7, 7}),
Values(KernelShape {1, 1, 1}),
Values(Groups(1))));
// Small image (7x7), non-trivial kernel
-INSTANTIATE_TEST_CASE_P(i3d_id301, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id301, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 48, 10, 7, 7}),
Values(KernelShape {3, 3, 3}),
Values(Groups(1))));
// Trivial image (1x1), trivial kernel
-INSTANTIATE_TEST_CASE_P(i3d_id314, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id314, myriadLayersConvNDTest_smoke,
Combine(
Values(InputShape {1, 1024, 9, 1, 1}),
Values(KernelShape {1, 1, 1}),
}
};
-class myriadLayersConvNDTest_nightly: public ConvNDTest {};
+class myriadLayersConvNDTest_smoke: public ConvNDTest {};
-TEST_P(myriadLayersConvNDTest_nightly, ConvND) {
+TEST_P(myriadLayersConvNDTest_smoke, ConvND) {
testConvND();
}
#include "myriad_layers_convert_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsIOConvert_nightly,
+ accuracy, myriadLayersTestsIOConvert_smoke,
::testing::Combine(
::testing::ValuesIn(inputsDims),
- ::testing::ValuesIn(precisionsIO)
+ ::testing::ValuesIn(precisionsIO),
+ ::testing::Values("")
)
);
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsConvertWithFP16_nightly,
+ accuracy_customu8f16, myriadLayersTestsIOConvert_smoke,
+ ::testing::Combine(
+ ::testing::ValuesIn(inputsDims4D),
+ ::testing::Values(PrecisionPair{Precision::U8, Precision::FP16}),
+ ::testing::Values(s_CustomConfig)
+ )
+);
+
+INSTANTIATE_TEST_CASE_P(
+ accuracy, myriadLayersTestsConvertWithFP16_smoke,
::testing::Combine(
::testing::ValuesIn(inputsDims),
::testing::ValuesIn(withFP16Precisions)
using namespace InferenceEngine;
+PRETTY_PARAM(CustomConfig, std::string);
+
typedef std::pair<Precision, Precision> PrecisionPair;
-typedef std::tuple<InferenceEngine::SizeVector, PrecisionPair> ConvertIOTestParam;
+typedef std::tuple<SizeVector, PrecisionPair, CustomConfig> ConvertIOTestParam;
typedef std::tuple<InferenceEngine::SizeVector, Precision> ConvertWithFP16TestParam;
-class myriadLayersTestsIOConvert_nightly: public myriadLayersTests_nightly,
- public testing::WithParamInterface<ConvertIOTestParam> {
+static CustomConfig s_CustomConfig = {
+#ifdef VPU_HAS_CUSTOM_KERNELS
+ {getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"}
+#endif
};
-TEST_P(myriadLayersTestsIOConvert_nightly, TestsIOConvert)
-{
- const auto& param = ::testing::WithParamInterface<ConvertIOTestParam>::GetParam();
- const auto& inputDims = std::get<0>(param);
- const auto& precisions = std::get<1>(param);
- const auto& inputPrecision = precisions.first;
- const auto& outputPrecision = precisions.second;
+typedef myriadLayerTestBaseWithParam<ConvertIOTestParam> myriadLayersTestsIOConvert_smoke;
- SetInputTensors({inputDims});
- SetOutputTensors({inputDims});
+TEST_P(myriadLayersTestsIOConvert_smoke, TestsIOConvert) {
+ const SizeVector& dims = std::get<0>(GetParam());
+ const PrecisionPair& precision = std::get<1>(GetParam());
+ const std::string& customConfig = std::get<2>(GetParam());
+ const auto& inputPrecision = precision.first;
+ const auto& outputPrecision = precision.second;
- makeSingleLayerNetwork(LayerInitParams("Copy"),
- NetworkInitParams()
- .inputPrecision(inputPrecision)
- .outputPrecision(outputPrecision));
- ASSERT_TRUE(Infer());
+ if(!customConfig.empty() && !CheckMyriadX()) {
+ GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
+ }
+ _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
- auto tensorDesc = InferenceEngine::TensorDesc(
- outputPrecision, _outputMap.begin()->second->getTensorDesc().getDims(),
- _outputMap.begin()->second->getTensorDesc().getLayout());
- auto refBlob = make_blob_with_precision(outputPrecision, tensorDesc);
- refBlob->allocate();
+ _config[VPU_CONFIG_KEY(DISABLE_CONVERT_STAGES)] = CONFIG_VALUE(YES);
+
+ SetInputTensors({dims});
+ SetOutputTensors({dims});
+
+ std::map<std::string, std::string> params = {
+ {"precision", std::to_string(outputPrecision)},
+ {"scale", std::to_string(1.0)}, // scale and bias are needed for custom layer
+ {"bias", std::to_string(0.0)}
+ };
- ref_convert(_inputMap.begin()->second, refBlob);
+ ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(
+ LayerInitParams("Convert")
+ .params(params)
+ .in({dims})
+ .out({dims})
+ .outPrecision(outputPrecision),
+ NetworkInitParams()
+ .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+ .inputPrecision(inputPrecision)
+ .outputPrecision(outputPrecision)
+ .lockLayout(true)));
- CompareCommonAbsolute(_outputMap.begin()->second, refBlob, ERROR_BOUND);
+ ASSERT_TRUE(Infer());
+
+ ASSERT_NO_FATAL_FAILURE(ref_convert(_inputMap.begin()->second, _refBlob));
+
+ CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
}
-class myriadLayersTestsConvertWithFP16_nightly: public myriadLayersTests_nightly,
- public testing::WithParamInterface<ConvertWithFP16TestParam> {
-};
+typedef myriadLayerTestBaseWithParam<ConvertWithFP16TestParam> myriadLayersTestsConvertWithFP16_smoke;
-TEST_P(myriadLayersTestsConvertWithFP16_nightly, TestsConvertWithFP16)
+TEST_P(myriadLayersTestsConvertWithFP16_smoke, TestsConvertWithFP16)
{
const auto& param = ::testing::WithParamInterface<ConvertWithFP16TestParam>::GetParam();
const auto& inputDims = std::get<0>(param);
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
}
-std::vector<InferenceEngine::SizeVector> inputsDims = {
+std::vector<SizeVector> inputsDims = {
{ 224, 224 },
{ 3, 224, 224 },
{ 1, 1, 224, 224 },
{ 2, 2, 3, 224, 224 },
};
+std::vector<SizeVector> inputsDims4D = {
+ {{ 1, 1, 224, 224 }},
+ {{ 1, 1, 416, 416 }},
+ {{ 1, 1, 62, 62 }},
+ {{ 1, 1, 227, 227 }},
+ {{ 1, 3, 224, 224 }},
+ {{ 1, 3, 360, 480 }},
+};
+
std::vector<PrecisionPair> precisionsIO = {
{Precision::U8, Precision::FP16},
{Precision::FP32, Precision::FP16},
#include "myriad_layers_convolution1x1.hpp"
-INSTANTIATE_TEST_CASE_P(myriad, myriadConvolution1x1LayerTests_nightly,
+INSTANTIATE_TEST_CASE_P(myriad, myriadConvolution1x1LayerTests_smoke,
::testing::Combine(
::testing::Values(CONFIG_VALUE(NO)),
::testing::ValuesIn(s_isHWC),
PRETTY_PARAM(dimsConfig, dims_config);
PRETTY_PARAM(isHWC, int);
-typedef myriadLayerTestBaseWithParam<std::tuple<std::string, isHWC, dims_config>> myriadConvolution1x1LayerTests_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<std::string, isHWC, dims_config>> myriadConvolution1x1LayerTests_smoke;
void refConvolution1x1(const Blob::Ptr src, InferenceEngine::TBlob<uint8_t>::Ptr weights, Blob::Ptr dst, int isHWC) {
ie_fp16 *in = static_cast<ie_fp16*>(src->buffer());
size_t IW = in_width;
size_t IH = in_height;
size_t IC = in_channels;
-
+
const auto& out_dims = dst->getTensorDesc().getDims();
size_t out_width = out_dims[out_dims.size() - 1];
size_t out_height = out_dims[out_dims.size() - 2];
continue;
}
uint32_t indx;
- if(isHWC == 1){
- indx = ic + iw * IC + ih * IC * IW;
+ if (isHWC == 1) {
+ indx = ic + iw * IC + ih * IC * IW;
valYXZ = (valYXZ) + (PrecisionUtils::f16tof32(in[indx]) * PrecisionUtils::f16tof32(w[oc*IC + ic]));
}
- else {
+ else
+ {
indx = iw + ih * IW + ic * IW * IH;
valZYX = PrecisionUtils::f32tof16(PrecisionUtils::f16tof32(valZYX) + PrecisionUtils::f16tof32(PrecisionUtils::f32tof16(PrecisionUtils::f16tof32(in[indx]) * PrecisionUtils::f16tof32(w[oc*IC + ic]))));
}
}
- if(isHWC == 1){
+ if (isHWC == 1) {
out[oc*OH*OW + oh*OW + ow] = PrecisionUtils::f32tof16(valYXZ);
}
else {
}
}
-TEST_P(myriadConvolution1x1LayerTests_nightly, Convolution1x1) {
+TEST_P(myriadConvolution1x1LayerTests_smoke, Convolution1x1) {
std::string model = R"V0G0N(
<net name="Convolution1x1" version="2" batch="1">
<layers>
</output>
</layer>
<layer id="2" name="conv1x1" precision="FP16" type="Convolution">
- <data isHWC="@isHWC@" stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="48" group="1"/>
+ <data stride="1,1" pad="0,0" kernel="1,1" dilation="1,1" output="48" group="1"/>
<input>
<port id="0">
<dim>@IB@</dim>
std::string HWConfigValue = std::get<0>(GetParam());
int isHWC = std::get<1>(GetParam());
dims_config customConfig = std::get<2>(GetParam());
+ const auto layout = isHWC ? Layout::NHWC : Layout::NCHW;
if(!customConfig.custom_config.empty() && !CheckMyriadX()) {
GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
size_t num_weights = IC * OC;
- model.replace( model.find("@isHWC@"), sizeof("@isHWC@") -1, std::to_string(isHWC));
-
model.replace( model.find("@IB@"), sizeof("@IB@") -1, std::to_string(IB));
model.replace( model.find("@IB@"), sizeof("@IB@") -1, std::to_string(IB));
model.replace( model.find("@IC@"), sizeof("@IC@") -1, std::to_string(IC));
model.replace( model.find("@size_weights@"), sizeof("@size_weights@") -1, std::to_string(num_weights * sizeof(ie_fp16)));
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(GenWeights(num_weights));
-
+
StatusCode st;
InferenceEngine::Core ie;
_inputsInfo = network.getInputsInfo();
_inputsInfo["data"]->setPrecision(Precision::FP16);
- (isHWC) ? _inputsInfo["data"]->setLayout(NHWC) : _inputsInfo["data"]->setLayout(NCHW);
+ _inputsInfo["data"]->setLayout(layout);
_outputsInfo = network.getOutputsInfo();
_outputsInfo["conv1x1"]->setPrecision(Precision::FP16);
- _outputsInfo["conv1x1"]->setLayout(NCHW);
+ _outputsInfo["conv1x1"]->setLayout(layout);
ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(_exeNetwork, network,
{{VPU_CONFIG_KEY(CUSTOM_LAYERS), customConfig.custom_config}, {VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), HWConfigValue}}, &_resp));
#ifdef VPU_HAS_CUSTOM_KERNELS
{0, 1}
#endif
-};
\ No newline at end of file
+};
#include "myriad_layers_convolution3x3.hpp"
-INSTANTIATE_TEST_CASE_P(myriad, myriadConvolution3x3LayerTests_nightly,
+INSTANTIATE_TEST_CASE_P(myriad, myriadConvolution3x3LayerTests_smoke,
::testing::Combine(
::testing::Values(CONFIG_VALUE(NO)),
::testing::ValuesIn(s_DimsConfig)));
PRETTY_PARAM(hwAcceleration, std::string);
PRETTY_PARAM(dimsConfig, dims_config_con3x3);
-typedef myriadLayerTestBaseWithParam<std::tuple<std::string, dims_config_con3x3>> myriadConvolution3x3LayerTests_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<std::string, dims_config_con3x3>> myriadConvolution3x3LayerTests_smoke;
void refConvolution3x3(const Blob::Ptr src, InferenceEngine::TBlob<uint8_t>::Ptr weights, Blob::Ptr dst, int stride_x, int stride_y, int pad_x, int pad_y, int dilation_x, int dilation_y) {
}
}
-TEST_P(myriadConvolution3x3LayerTests_nightly, Convolution3x3) {
+TEST_P(myriadConvolution3x3LayerTests_smoke, Convolution3x3) {
std::string model = R"V0G0N(
<net name="Convolution3x3" version="2" batch="1">
<layers>
</output>
</layer>
<layer id="2" name="conv3x3" precision="FP16" type="Convolution">
- <data stride-x="@stride-x@" stride-y="@stride-y@" pad-x="1" pad-y="1" dilation-x="1" dilation-y="1" output="1" kernel-x="3" kernel-y="3"/>
+ <data stride="@stride-x@,@stride-y@" pads_begin="1,1" pads_end="1,1" dilation="1,1" output="1" kernel="3,3"/>
<input>
<port id="0">
<dim>@IB@</dim>
#include "myriad_layers_convolution_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy_chw_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_chw_dilation, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 37, 43, 43)
, MAKE_STRUCT(tensor_test_params, 1, 37, 19, 19))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 3, 32, 24})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_0, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_0, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 5, 1, 1})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 576, 7, 7})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_2, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_2, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 128, 7, 7})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_3, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_3, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 4, 7, 7})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_4, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_4, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 256, 7, 7})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_5, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_5, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 1024, 4, 4})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_6, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_6, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 192, 4, 4})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_7, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_7, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 160, 4, 4})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_8, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_8, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 224, 4, 4})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_9, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_9, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 1024, 4, 4})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_10, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_10, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 64, 56, 56})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_11, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_11, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 192, 7, 7})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_12, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_12, myriadLayers_BatchTest_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 576, 7, 7})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest2_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest2_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 576, 7, 7})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_3X3, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3X3, myriadLayers_IR3_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 3, 32, 24})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_3X1, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3X1, myriadLayers_IR3_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 3, 32, 24})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_1X3, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1X3, myriadLayers_IR3_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 4, 16, 16})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_3X3X3_ConstInput_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_3X3X3_ConstInput_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 3, 10, 10})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_crossroad_spatialConv, myriadLayerConvolutionTensorFlow_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_crossroad_spatialConv, myriadLayerConvolutionTensorFlow_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 3, 1024, 1024))
, ::testing::Values<DimsOutput>(MAKE_STRUCT(tensor_test_params, 1, 3, 512, 512))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_inception_v2, myriadLayerConvolutionTensorFlow_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_inception_v2, myriadLayerConvolutionTensorFlow_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 16, 28, 28))
, ::testing::Values<DimsOutput>(MAKE_STRUCT(tensor_test_params, 1, 64, 14, 14))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_inception_v1, myriadLayerConvolutionTensorFlow_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_inception_v1, myriadLayerConvolutionTensorFlow_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 3, 224, 224),
MAKE_STRUCT(tensor_test_params, 1, 32, 224, 224)
)
);
-INSTANTIATE_TEST_CASE_P(test_3x3_SSD_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_3x3_SSD_dilation, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 512, 19, 19))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(test_TF_Resnet_50, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(test_TF_Resnet_50, myriadLayers_IR3_ConvTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 38, 38})
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(test_3x3_icvnet_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_3x3_icvnet_dilation, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 20, 20))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(test_5x5_with_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_5x5_with_dilation, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 64, 77))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 5, 5))
)
);
-INSTANTIATE_TEST_CASE_P(test_7x7_with_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_7x7_with_dilation, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 64, 77))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 7, 7))
);
-INSTANTIATE_TEST_CASE_P(test_conv1x1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_conv1x1, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 10, 13, 13))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_3x3_convolution, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_3x3_convolution, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 512, 13, 13))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_1x1_convolution, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_1x1_convolution, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 4608, 13, 13))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 64, 77)
, MAKE_STRUCT(tensor_test_params, 1, 32, 112, 96))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_group_large_input, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_group_large_input, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 192, 336))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_any_group, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_any_group, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 64, 77))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3_with_group, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3_with_group, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 80, 80)
, MAKE_STRUCT(tensor_test_params, 1, 36, 80, 80))
)
);
-INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3s1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3s1, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 80, 80))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_1x1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1x1, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 16, 64, 64)
, MAKE_STRUCT(tensor_test_params, 1, 32, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 8, 16, 16)
, MAKE_STRUCT(tensor_test_params, 1, 8, 59, 73))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_1x3, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1x3, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 8, 59, 73))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 3), MAKE_STRUCT(param_size, 3, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_5x5, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_5x5, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 16, 32, 32)
/*, MAKE_STRUCT(tensor_test_params, 1, 8, 511, 399) failed*/)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_7x7, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_7x7, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 8, 32, 32)
/*, MAKE_STRUCT(tensor_test_params, 1, 8, 511, 399) failed*/)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_1, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 3, 720, 1280))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_2, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_2, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 357, 637))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
);
-INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_3, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_3, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 16, 359, 639))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_1x1_large_input, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1x1_large_input, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 355, 635))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_small_input_0, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_small_input_0, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 128, 38, 38))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
, ::testing::Values<layoutPreference>(vpu::LayoutPreference::ChannelMinor)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_small_input_1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_small_input_1, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 256, 2, 3))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
, ::testing::Values<layoutPreference>(vpu::LayoutPreference::ChannelMinor)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_small_input_2, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_small_input_2, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 256, 2, 2))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
, ::testing::Values<layoutPreference>(vpu::LayoutPreference::ChannelMinor)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_small_input_3, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_small_input_3, myriadLayerConvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 256, 1, 1))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
PRETTY_PARAM(layoutPreference, vpu::LayoutPreference);
typedef myriadLayerTestBaseWithParam<tuple<DimsInput, kernel, stride, pad
- , out_channels, group, dilation_factor, layoutPreference >> myriadLayerConvolution_nightly;
+ , out_channels, group, dilation_factor, layoutPreference >> myriadLayerConvolution_smoke;
typedef myriadLayerTestBaseWithParam<tuple<DimsInput, DimsOutput, kernel, stride, pad
- , group, dilation_factor, layoutPreference >> myriadLayerConvolutionTensorFlow_nightly;
+ , group, dilation_factor, layoutPreference >> myriadLayerConvolutionTensorFlow_smoke;
-TEST_P(myriadLayerConvolution_nightly, Convolution) {
+TEST_P(myriadLayerConvolution_smoke, Convolution) {
tensor_test_params input_dims = get<0>(GetParam());
param_size kernel = get<1>(GetParam());
param_size stride = get<2>(GetParam());
CompareCommonAbsolute(outputBlob, _refBlob, maxerr);
}
-TEST_P(myriadLayerConvolutionTensorFlow_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionTensorFlow_smoke, Convolution) {
tensor_test_params input_dims = get<0>(GetParam());
tensor_test_params output_dims = get<1>(GetParam());
param_size kernel = get<2>(GetParam());
}
}
-class myriadLayers_3X3X3_ConstInput_nightly: public ConvolutionTest<vpu::LayoutPreference>{
+class myriadLayers_3X3X3_ConstInput_smoke: public ConvolutionTest<vpu::LayoutPreference>{
};
-TEST_P(myriadLayers_3X3X3_ConstInput_nightly, Convolution) {
+TEST_P(myriadLayers_3X3X3_ConstInput_smoke, Convolution) {
auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, uint32_t, uint32_t, vpu::LayoutPreference>>::GetParam();
const auto layoutPreference = std::get<6>(p);
/* IR version 3 tests, main difference is a changes in padding parameters definitions */
typedef std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, param_size, uint32_t, uint32_t> IR3_params;
-class myriadLayers_IR3_ConvTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_IR3_ConvTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
public testing::WithParamInterface<IR3_params> {
};
-TEST_P(myriadLayers_IR3_ConvTests_nightly, Conv) {
+TEST_P(myriadLayers_IR3_ConvTests_smoke, Conv) {
std::map<std::string, std::string> params;
InferenceEngine::SizeVector output_tensor;
int32_t IW = 0;
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), maxerr);
}
-class myriadLayers_BatchTest_ConvTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_BatchTest_ConvTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
public testing::WithParamInterface<IR3_params> {
};
-class myriadLayers_BatchTest2_ConvTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_BatchTest2_ConvTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
public testing::WithParamInterface<IR3_params> {
};
}
}
-TEST_P(myriadLayers_BatchTest_ConvTests_nightly, Conv) {
+TEST_P(myriadLayers_BatchTest_ConvTests_smoke, Conv) {
std::map<std::string, std::string> params;
InferenceEngine::SizeVector output_tensor;
int32_t IW = 0;
</net>
)V0G0N";
-TEST_P(myriadLayers_BatchTest2_ConvTests_nightly, Conv) {
+TEST_P(myriadLayers_BatchTest2_ConvTests_smoke, Conv) {
std::map<std::string, std::string> params;
InferenceEngine::SizeVector output_tensor;
int32_t IW = 0;
#include "myriad_layers_copy_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerCopy_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerCopy_smoke,
::testing::Combine(
::testing::Values<NDims>(MAKE_STRUCT(nd_tensor_test_params, {36, 19, 20, 21})
, MAKE_STRUCT(nd_tensor_test_params, {7, 8, 5, 12})
PRETTY_PARAM(NDims, nd_tensor_test_params);
-typedef myriadLayerTestBaseWithParam<tuple<NDims, int>> myriadLayerCopy_nightly;
+typedef myriadLayerTestBaseWithParam<tuple<NDims, int>> myriadLayerCopy_smoke;
-TEST_P(myriadLayerCopy_nightly, Copy) {
+TEST_P(myriadLayerCopy_smoke, Copy) {
nd_tensor_test_params input_dims = get<0>(GetParam());
int ndims = get<1>(GetParam());
#include "myriad_layers_crop_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy_Crop, myriadLayerCropOneInputAndDim_nightly,
+ accuracy_Crop, myriadLayerCropOneInputAndDim_smoke,
::testing::Combine(
::testing::ValuesIn(s_tileTensors1),
::testing::ValuesIn(s_tileTensors2),
);
INSTANTIATE_TEST_CASE_P(
- accuracy_Crop1, myriadLayerCropOneInput_nightly,
+ accuracy_Crop1, myriadLayerCropOneInput_smoke,
::testing::Combine(
::testing::ValuesIn(s_tileTensors1),
::testing::ValuesIn(s_tileTensors2),
);
INSTANTIATE_TEST_CASE_P(
- accuracy_Crop2, myriadLayerCropTwoInputs_nightly,
+ accuracy_Crop2, myriadLayerCropTwoInputs_smoke,
::testing::Combine(
::testing::ValuesIn(s_tileTensors1),
::testing::ValuesIn(s_tileTensors2),
PRETTY_PARAM(crop_begin, InferenceEngine::SizeVector)
PRETTY_PARAM(crop_end, InferenceEngine::SizeVector)
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, crop_axis, offset, dim >> myriadLayerCropOneInputAndDim_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, crop_axis, crop_begin, crop_end >> myriadLayerCropOneInput_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, Dims, crop_axis, offset >> myriadLayerCropTwoInputs_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, crop_axis, offset, dim >> myriadLayerCropOneInputAndDim_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, crop_axis, crop_begin, crop_end >> myriadLayerCropOneInput_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, Dims, crop_axis, offset >> myriadLayerCropTwoInputs_smoke;
static void ref_crop(const Blob::Ptr src,
Blob::Ptr dst,
}
}
-TEST_P(myriadLayerCropOneInputAndDim_nightly, CropWithOneInputAndDim) {
+TEST_P(myriadLayerCropOneInputAndDim_smoke, CropWithOneInputAndDim) {
auto param = GetParam();
tensor_test_params tensor1 = std::get<0>(param);
tensor_test_params tensor2 = std::get<1>(param);
CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
}
-TEST_P(myriadLayerCropOneInput_nightly, CropWithOneInput) {
+TEST_P(myriadLayerCropOneInput_smoke, CropWithOneInput) {
auto param = GetParam();
tensor_test_params tensor1 = std::get<0>(param);
tensor_test_params tensor2 = std::get<1>(param);
CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
}
-TEST_P(myriadLayerCropTwoInputs_nightly, CropWithTwoInputs) {
+TEST_P(myriadLayerCropTwoInputs_smoke, CropWithTwoInputs) {
auto param = GetParam();
tensor_test_params tensor1 = std::get<0>(param);
tensor_test_params tensor2 = std::get<1>(param);
#include "myriad_layers_custom_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsShuffleChannel_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsShuffleChannel_smoke,
::testing::Combine(
::testing::ValuesIn(s_ShuffleChannelTensors),
::testing::ValuesIn(s_ShuffleChannelGroup),
::testing::ValuesIn(s_CustomConfig)));
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsQuantize_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFakeQuantize_smoke,
::testing::Combine(
::testing::ValuesIn(s_QuantizeTensors),
::testing::ValuesIn(s_QuantizeLevels),
+ ::testing::Values(IRVersion::v7, IRVersion::v10),
::testing::ValuesIn(s_CustomConfig)));
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsQuantizeBinarize_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsQuantizeBinarize_smoke,
::testing::Combine(
::testing::ValuesIn(s_QuantizeTensors),
::testing::ValuesIn(s_QuantizeLevels),
::testing::ValuesIn(s_QuantizeSwitchOut),
::testing::ValuesIn(s_CustomConfig)));
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsBinaryConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsBinaryConvolution_smoke,
::testing::Combine(
::testing::ValuesIn(s_BinaryConvolutionTensors),
::testing::ValuesIn(s_BinaryConvolutionDilations),
::testing::ValuesIn(s_BinaryConvolutionStrides),
::testing::ValuesIn(s_CustomConfig)));
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExperimentalDetectronPriorGridGenerator_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExperimentalDetectronPriorGridGenerator_smoke,
::testing::Combine(
::testing::ValuesIn(s_ExperimentalDetectronPriorGridGeneratorImageDims),
::testing::ValuesIn(s_CustomConfig)));
+
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsCorrelate_smoke,
+ ::testing::Combine(
+ ::testing::ValuesIn(s_CorrelateParams),
+ ::testing::ValuesIn(s_CustomConfig)));
+
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSpatialTransform_smoke,
+ ::testing::Combine(
+ ::testing::ValuesIn(s_SpatialTransformInputs),
+ ::testing::ValuesIn(s_SpatialTransformTheta),
+ ::testing::ValuesIn(s_CustomConfig)));
// SPDX-License-Identifier: Apache-2.0
//
+#pragma once
+
#include "myriad_layers_tests.hpp"
+#include <vector>
+#include <array>
+#include <algorithm>
using namespace InferenceEngine;
int32_t W = 1;
int32_t H = 1;
int32_t C = 1;
+
get_dims(src, W, H, C);
for (int c = 0; c < C; c++) {
float olow = PrecisionUtils::f16tof32(output_low->size() == 1 ? output_low_data[0] : output_low_data[c]);
float ohigh = PrecisionUtils::f16tof32(output_high->size() == 1 ? output_high_data[0] : output_high_data[c]);
+ // emulate half math to be close to half float SHAVE implementation
+ float a = PrecisionUtils::f16tof32(PrecisionUtils::f32tof16((float)(levels - 1) / (ihigh - ilow)));
+ float b = PrecisionUtils::f16tof32(PrecisionUtils::f32tof16((ohigh - olow) / (float)(levels - 1)));
+
for (int h = 0; h < H; h++) {
for (int w = 0; w < W; w++) {
int idx = (isCHW) ? c*W*H + h*W + w : c + h*W*C + w*C;
dst_val = olow;
} else if (src_val > ihigh) {
dst_val = ohigh;
- } else {
- dst_val = round((src_val - ilow) * ((float)(levels - 1) / (ihigh - ilow))) * ((ohigh - olow) / (float)(levels - 1))+ olow;
- //dst_val = round((src_val - ilow) / (ihigh - ilow) * (levels - 1)) / (levels - 1) * (ohigh - olow) + olow;
+ } else {
+ if(!(ihigh - ilow) || !(levels - 1)) {
+ dst_val = olow;
+ } else {
+ // quantization pass
+ float quantized = PrecisionUtils::f16tof32(PrecisionUtils::f32tof16((src_val - ilow) * a));
+ // de-quantization pass
+ dst_val = PrecisionUtils::f16tof32(PrecisionUtils::f32tof16(roundf(quantized) * b)) + olow;
+ }
}
dst_data[idx] = PrecisionUtils::f32tof16(dst_val);
}
}
}
+
+static void rearrange(const ie_fp16* in, ie_fp16* out, int num, int channels, int width, int height,
+ int widthheight, int padding, int pwidthheight)
+{
+ (void) height;
+ (void) pwidthheight;
+
+ ASSERT_TRUE(num == 1) << "batch is not supported for Myriad";
+
+ for (int xy = 0; xy < widthheight; xy++)
+ {
+ for (int ch = 0; ch < channels; ch++)
+ {
+ ie_fp16 value = in[ch * widthheight + xy];
+
+ int xpad = (xy % width + padding);
+ int ypad = (xy / width + padding);
+ int xypad = ypad * (width + 2 * padding) + xpad;
+
+ out[xypad * channels + ch] = value;
+ }
+ }
+}
+
+static void correlate(int nthreads, int num, int topwidth, int topheight, int topchannels, int topcount,
+ int max_displacement, int neighborhood_grid_radius, int neighborhood_grid_width,
+ int kernel_radius, int kernel_size, int stride1, int stride2,
+ int bottomwidth, int bottomheight, int bottomchannels,
+ const ie_fp16* bottom0, const ie_fp16* bottom1, ie_fp16* top)
+{
+ (void) nthreads;
+ (void) kernel_radius;
+ (void) topcount;
+ (void) bottomheight;
+ (void) num;
+
+ const int sumelems = kernel_size * kernel_size * bottomchannels;
+
+ auto patch_data = std::vector<ie_fp16>(sumelems);
+
+ for (int blockIdx_y = 0; blockIdx_y < topheight; blockIdx_y++)
+ {
+ for (int blockIdx_x = 0; blockIdx_x < topwidth; blockIdx_x++)
+ {
+ int x1 = blockIdx_x * stride1 + max_displacement;
+ int y1 = blockIdx_y * stride1 + max_displacement;
+ // Load 3D patch into shared memory
+ for (int j = 0; j < kernel_size; j++)
+ {
+ for (int i = 0; i < kernel_size; i++)
+ {
+ int idx1 = ( j * kernel_size + i) * bottomchannels;
+ int idx2 = ((y1 + j) * bottomwidth + x1 + i) * bottomchannels;
+
+ for (int ch = 0; ch < bottomchannels; ch++)
+ patch_data[idx1 + ch] = bottom0[idx2 + ch];
+ }
+ }
+
+ for (int top_channel = 0; top_channel < topchannels; top_channel++)
+ {
+ int x2 = x1 + (top_channel % neighborhood_grid_width - neighborhood_grid_radius) * stride2;
+ int y2 = y1 + (top_channel / neighborhood_grid_width - neighborhood_grid_radius) * stride2;
+
+ float sum = (0.0f);
+ for (int j = 0; j < kernel_size; j++)
+ {
+ for (int i = 0; i < kernel_size; i++)
+ {
+ int idx1 = ( j * kernel_size + i) * bottomchannels;
+ int idx2 = ((y2 + j) * bottomwidth + x2 + i) * bottomchannels;
+
+ for (int ch = 0; ch < bottomchannels; ch++)
+ sum += PrecisionUtils::f16tof32(patch_data[idx1 + ch]) * PrecisionUtils::f16tof32(bottom1[idx2 + ch]);
+ }
+ }
+ top[top_channel * topheight * topwidth + blockIdx_y * topwidth + blockIdx_x]
+ = PrecisionUtils::f32tof16(sum / (float)sumelems);
+ }
+ }
+ }
+}
+
+static void refCorrelate(const Blob::Ptr in0,
+ const Blob::Ptr in1,
+ Blob::Ptr out,
+ int kernel_size, int max_displacement, int pad_size,
+ int stride1, int stride2) {
+ // Correlation type = MULTIPLY
+ ASSERT_NE(in0, nullptr);
+ ASSERT_NE(in1, nullptr);
+ ASSERT_NE(out, nullptr);
+
+ const ie_fp16 *in0_data = in0->buffer();
+ const ie_fp16 *in1_data = in1->buffer();
+ ie_fp16 *out_data = out->buffer();
+ ASSERT_NE(in0_data, nullptr);
+ ASSERT_NE(in1_data, nullptr);
+ ASSERT_NE(out_data, nullptr);
+
+ int32_t IW0 = 1;
+ int32_t IH0 = 1;
+ int32_t IC0 = 1;
+ get_dims(in0, IW0, IH0, IC0);
+ int32_t IW1 = 1;
+ int32_t IH1 = 1;
+ int32_t IC1 = 1;
+ get_dims(in1, IW1, IH1, IC1);
+ ASSERT_EQ(IW0, IW1);
+ ASSERT_EQ(IH0, IH1);
+ ASSERT_EQ(IC0, IC1);
+
+ int32_t OW = 1;
+ int32_t OH = 1;
+ int32_t OC = 1;
+ get_dims(out, OW, OH, OC);
+
+ const int bottomchannels = IC0;
+
+ const int paddedbottomwidth = IW0 + 2 * pad_size;
+ const int paddedbottomheight = IH0 + 2 * pad_size;
+
+ const int kernel_radius = kernel_size / 2; //size of unreachable border region (on each side)
+ const int border_size = max_displacement + kernel_radius; //size of unreachable border region (on each side)
+
+ const int top_width = (int)ceilf((float)(paddedbottomwidth - border_size * 2) / (float)stride1);
+ const int top_height = (int)ceilf((float)(paddedbottomheight - border_size * 2) / (float)stride1);
+
+ ASSERT_TRUE(top_width >= 1 && top_height >= 1)
+ << "Correlation cannot be done with current settings. Neighborhood and kernel don't fit in blob";
+
+ // Given a center position in image 1,
+ // how many displaced positions in -x / +x direction do we consider in image 2 (neighborhoodGridWidth):
+ const int neighborhood_grid_radius = max_displacement / stride2;
+ const int neighborhood_grid_width = 2 * neighborhood_grid_radius + 1;
+
+ const int top_channels = neighborhood_grid_width * neighborhood_grid_width;
+
+ ASSERT_TRUE(OC == top_channels && OH == top_height && OW == top_width)
+ << "input and output blobs have incompatible shapes";
+
+ auto rbot1 = std::vector<ie_fp16>(paddedbottomheight * paddedbottomwidth * bottomchannels);
+ auto rbot2 = std::vector<ie_fp16>(paddedbottomheight * paddedbottomwidth * bottomchannels);
+
+ const int bnum = 1;
+ const int topcount = top_width * top_height * top_channels;
+
+ const int pwidthheight = (IW0 + 2 * pad_size) * (IH0 + 2 * pad_size);
+
+ rearrange(in0_data, rbot1.data(), bnum, IC0, IW0, IH0, IW0 * IH0, pad_size, pwidthheight);
+ rearrange(in1_data, rbot2.data(), bnum, IC0, IW0, IH0, IW0 * IH0, pad_size, pwidthheight);
+
+ const int height = IH0 + 2 * pad_size;
+ const int width = IW0 + 2 * pad_size;
+ correlate(topcount, bnum, top_width, top_height, top_channels, topcount,
+ max_displacement, neighborhood_grid_radius, neighborhood_grid_width,
+ kernel_radius, kernel_size, stride1, stride2, width, height, IC0,
+ rbot1.data(), rbot2.data(), out_data);
+}
+
+static float transform_forward_cpu(const ie_fp16* pic, const float px, const float py, int W, int H) {
+ float res = 0.0f;
+ float x = (px + 1) / 2 * H;
+ float y = (py + 1) / 2 * W;
+ int m, n, k, l;
+ float w;
+ k = (floorf(x));
+ l = (floorf(y));
+ m = floorf(x);
+ n = floorf(y);
+ w = 0;
+
+ if (k >= 0 && k < H && l >= 0 && l < W) {
+ w = fmaxf(0.0f, 1 - fabsf(x - m)) * fmaxf(0.0f, 1 - fabsf(y - n));
+ res += w * PrecisionUtils::f16tof32(pic[k * W + l]);
+ }
+
+ k = (floorf(x) + 1);
+ l = (floorf(y));
+ m = floorf(x) + 1;
+ n = floorf(y);
+
+ w = 0;
+ if (k >= 0 && k < H && l >= 0 && l < W) {
+ w = fmaxf(0.0f, 1 - fabsf(x - m)) * fmaxf(0.0f, 1 - fabsf(y - n));
+ res += w * PrecisionUtils::f16tof32(pic[k * W + l]);
+ }
+ k = (floorf(x));
+ l = (floorf(y) + 1);
+ m = floorf(x);
+ n = floorf(y) + 1;
+ w = 0;
+ if (k >= 0 && k < H && l >= 0 && l < W) {
+ w = fmaxf(0.0f, 1 - fabsf(x - m)) * fmaxf(0.0f, 1 - fabsf(y - n));
+ res += w * PrecisionUtils::f16tof32(pic[k * W + l]);
+ }
+ k = (floorf(x) + 1);
+ l = (floorf(y) + 1);
+ m = floorf(x) + 1;
+ n = floorf(y) + 1;
+ w = 0;
+
+ if (k >= 0 && k < H && l >= 0 && l < W) {
+ w = fmaxf(0.0f, 1 - fabsf(x - m)) * fmaxf(0.0f, 1 - fabsf(y - n));
+ res += w * PrecisionUtils::f16tof32(pic[k * W + l]);
+ }
+
+ return PrecisionUtils::f32tof16(res);
+}
+
+static void matrixMult(const std::vector<float>& A, const std::vector<float>& B, std::vector<float>& C,
+ const int m, const int n, const int k, const int transposeB) {
+ if (transposeB) {
+ for (int rowA = 0; rowA < m; rowA++) {
+ for (int rowB = 0; rowB < n; rowB++) {
+ float sum = 0;
+ for (int colA = 0; colA < k; colA++) {
+ sum += A[rowA * k + colA] * B[rowB * k + colA];
+ }
+ C[rowA * n + rowB] = sum;
+ }
+ }
+ } else {
+ for (int rowA = 0; rowA < m; rowA++) {
+ for (int colB = 0; colB < n; colB++) {
+ float sum = 0;
+ for (int colA = 0; colA < k; colA++) {
+ sum += A[rowA * k + colA] * B[colA * n + colB];
+ }
+ C[rowA * n + colB] = sum;
+ }
+ }
+ }
+}
+
+static void refSpatialTransform(const Blob::Ptr& src, const Blob::Ptr& theta, Blob::Ptr dst) {
+ ASSERT_NE(src, nullptr);
+ ASSERT_NE(theta, nullptr);
+ ASSERT_NE(dst, nullptr);
+
+ const ie_fp16 *src_data = src->buffer();
+ const ie_fp16 *theta_data = theta->buffer();
+ ie_fp16 *dst_data = dst->buffer();
+ ASSERT_NE(src_data, nullptr);
+ ASSERT_NE(theta_data, nullptr);
+ ASSERT_NE(dst_data, nullptr);
+ ASSERT_EQ(theta->size(), 6);
+
+ int C = src->getTensorDesc().getDims()[1];
+ int H = src->getTensorDesc().getDims()[2];
+ int W = src->getTensorDesc().getDims()[3];
+
+ auto input_grid_data = std::vector<float>(2*H*W);
+ auto output_grid_data = std::vector<float>(3*H*W);
+ auto theta_float = std::vector<float>(6);
+ for (size_t i = 0; i < 6; i++) {
+ theta_float[i] = PrecisionUtils::f16tof32(theta_data[i]);
+ }
+
+ for (int i = 0; i < H * W; ++i) {
+ output_grid_data[3 * i] = ((i / W) * 1.0f / H * 2.0f - 1.0f);
+ output_grid_data[3 * i + 1] = ((i % W) * 1.0f / W * 2.0f - 1.0f);
+ output_grid_data[3 * i + 2] = 1.0f;
+ }
+ // Actually execute
+ int M_size = H * W;
+ int N_size = 2;
+ int K_size = 3;
+ matrixMult(output_grid_data, theta_float, input_grid_data, M_size, N_size, K_size, 1);
+ for (int j = 0; j < C; ++j) {
+ for (int s = 0; s < H; ++s) {
+ for (int t = 0; t < W; ++t) {
+ int row_idx = W * s + t;
+ float px = input_grid_data[row_idx * 2 + 0];
+ float py = input_grid_data[row_idx * 2 + 1];
+
+ size_t dst_offset = (j * H + s) * W + t;
+ size_t src_offset = (j * H + 0) * W + 0;
+ dst_data[dst_offset] = transform_forward_cpu(src_data + src_offset, px, py, W, H);
+ }
+ }
+ }
+}
+
static std::vector<std::string> s_CustomConfig = {
#ifdef VPU_HAS_CUSTOM_KERNELS
getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
PRETTY_PARAM(Kernel, param_size)
PRETTY_PARAM(Strides, int)
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Group, std::string>> myriadLayersTestsShuffleChannel_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Levels, std::string>> myriadLayersTestsQuantize_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Levels, SwitchOut, std::string>> myriadLayersTestsQuantizeBinarize_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dilations, Group, Kernel, Strides, std::string>> myriadLayersTestsBinaryConvolution_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<std::vector<size_t>, std::string>>
-myriadLayersTestsExperimentalDetectronPriorGridGenerator_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Group, std::string>> myriadLayersTestsShuffleChannel_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Levels, IRVersion, std::string>> myriadLayersTestsFakeQuantize_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Levels, SwitchOut, std::string>> myriadLayersTestsQuantizeBinarize_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dilations, Group, Kernel, Strides, std::string>> myriadLayersTestsBinaryConvolution_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<std::vector<size_t>, std::string>> myriadLayersTestsExperimentalDetectronPriorGridGenerator_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, std::array<float, 6>, std::string>> myriadLayersTestsSpatialTransform_smoke;
+
+struct CorrelateParams {
+ tensor_test_params dims;
+ int kernel_size;
+ int pad_size;
+ int max_displacement;
+ int stride1;
+ int stride2;
+};
-TEST_P(myriadLayersTestsShuffleChannel_nightly, ShuffleChannel) {
- tensor_test_params dims = std::get<0>(GetParam());
+typedef myriadLayerTestBaseWithParam<std::tuple<CorrelateParams, std::string>> myriadLayersTestsCorrelate_smoke;
+
+TEST_P(myriadLayersTestsShuffleChannel_smoke, ShuffleChannel) {
+ tensor_test_params dims = std::get<0>(GetParam());
int group = std::get<1>(GetParam());
std::string customConfig = std::get<2>(GetParam());
2
};
-TEST_P(myriadLayersTestsQuantize_nightly, Quantize) {
+TEST_P(myriadLayersTestsFakeQuantize_smoke, FakeQuantize) {
tensor_test_params dims = std::get<0>(GetParam());
int levels = std::get<1>(GetParam());
- std::string customConfig = std::get<2>(GetParam());
+ _irVersion = std::get<2>(GetParam());
+ std::string customConfig = std::get<3>(GetParam());
- if(!customConfig.empty() && !CheckMyriadX()) {
- GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
+ if (!customConfig.empty() && !CheckMyriadX()) {
+ GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
}
_config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
- IN_OUT_desc inpt(5);
- for (int i = 0; i < inpt.size(); ++i) {
- inpt[i].resize(4);
- inpt[i][0] = dims.n;
- inpt[i][1] = 1;
- inpt[i][2] = 1;
- inpt[i][3] = 1;
- }
- inpt[0][1] = dims.c;
- inpt[0][2] = dims.h;
- inpt[0][3] = dims.w;
- for (int i = 1; i < inpt.size(); ++i) {
- if (rand()%2 > 0) {
- inpt[i][1] = dims.c;
- }
- }
+ srand(42);
+
+ const auto inputFqSize = rand() % 2 ? 1 : dims.c;
+ const auto outputFqSize = rand() % 2 ? 1 : dims.c;
+
+ const auto inputDims = IN_OUT_desc{dims.asVector(),
+ {1, inputFqSize, 1, 1},
+ {1, inputFqSize, 1, 1},
+ {1, outputFqSize, 1, 1},
+ {1, outputFqSize, 1, 1}
+ };
- SetInputTensors(inpt);
+ SetInputTensors(inputDims);
SetOutputTensor(dims);
std::map<std::string, std::string> params;
params["levels"] = std::to_string(levels);
- ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("FakeQuantize").params(params)));
+ ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(
+ LayerInitParams("FakeQuantize").params(params),
+ NetworkInitParams()
+ .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+ .lockLayout(true)));
+
+ auto inputBlobs = std::vector<Blob::Ptr>{};
+ inputBlobs.reserve(5);
+ for (const auto& inputBlob : _inputMap) {
+ inputBlobs.push_back(inputBlob.second);
+ }
- ASSERT_TRUE(Infer());
+ const auto generateQuantBounds = [](const Blob::Ptr& lowBlob, const Blob::Ptr& highBlob) {
+ IE_ASSERT(lowBlob->size() == highBlob->size());
+ IE_ASSERT(lowBlob->getTensorDesc().getDims() == highBlob->getTensorDesc().getDims());
+
+ const auto lowBound = lowBlob->buffer().as<ie_fp16 *>();
+ const auto highBound = highBlob->buffer().as<ie_fp16 *>();
+ for (std::size_t i = 0; i < lowBlob->size(); i++) {
+ const float val1 = rand() % 256;
+ const float val2 = 255.0f - fabs(val1);
+ lowBound[i] = PrecisionUtils::f32tof16(std::min(val1, val2));
+ highBound[i] = PrecisionUtils::f32tof16(std::max(val1, val2));
+ }
+ };
- std::vector<Blob::Ptr> inputBlobs(inpt.size());
- auto inptIter = _inputMap.begin();
- for (int i = 0; i < inpt.size(); i++) {
- inputBlobs[i] = inptIter->second;
- inptIter++;
- }
+ generateQuantBounds(inputBlobs[1], inputBlobs[2]);
+ generateQuantBounds(inputBlobs[3], inputBlobs[4]);
+
+ ASSERT_TRUE(Infer());
ASSERT_NO_FATAL_FAILURE(refQuantize(inputBlobs[0],
inputBlobs[1],
inputBlobs[3],
inputBlobs[4],
_refBlob,
- levels, false));
+ levels, true));
- CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0.01f);
+ CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 1.f);
}
-TEST_P(myriadLayersTestsQuantizeBinarize_nightly, Quantize_Binarization) {
+TEST_P(myriadLayersTestsQuantizeBinarize_smoke, Quantize_Binarization) {
std::string model = R"V0G0N(
<net name="Quantize_Binarization" version="2" batch="1">
<layers>
int OH = dims.h;
int OW = dims.w;
- int input_low_size = (rand()%2>0) ? dims.c : 1;
- int input_high_size = (levels == 2) ? input_low_size : ((rand()%2>0) ? dims.c : 1);
- int output_low_size = (rand()%2>0) ? dims.c : 1;
- int output_high_size = (levels == 2) ? output_low_size : ((rand()%2>0) ? dims.c : 1);
+ int input_low_size = (rand()%2>0) ? dims.c : 1;
+ int input_high_size = (levels == 2) ? input_low_size : ((rand()%2>0) ? dims.c : 1);
+ int output_low_size = (rand()%2>0) ? dims.c : 1;
+ int output_high_size = (levels == 2) ? output_low_size : ((rand()%2>0) ? dims.c : 1);
model.replace( model.find("@IB@"), sizeof("@IB@") -1, std::to_string(IB));
model.replace( model.find("@IB@"), sizeof("@IB@") -1, std::to_string(IB));
1
};
-TEST_P(myriadLayersTestsBinaryConvolution_nightly, BinaryConvolution) {
+TEST_P(myriadLayersTestsBinaryConvolution_smoke, BinaryConvolution) {
tensor_test_params dims = std::get<0>(GetParam());
int dilations = std::get<1>(GetParam());
int group = std::get<2>(GetParam());
1, 2
};
-TEST_P(myriadLayersTestsExperimentalDetectronPriorGridGenerator_nightly,
+TEST_P(myriadLayersTestsExperimentalDetectronPriorGridGenerator_smoke,
ExperimentalDetectronPriorGridGenerator) {
// Setup parameters and configuration.
{1, 128, 30, 30}
};
+TEST_P(myriadLayersTestsCorrelate_smoke, Correlate) {
+ const auto test = std::get<0>(GetParam());
+ const auto dims = test.dims;
+ const int kernel_size = test.kernel_size;
+ const int pad_size = test.pad_size;
+ const int max_displacement = test.max_displacement;
+ const int stride1 = test.stride1;
+ const int stride2 = test.stride2;
+ const std::string customConfig = std::get<1>(GetParam());
+
+ if(!customConfig.empty() && !CheckMyriadX()) {
+ GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+ }
+ _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
+
+ const int paddedbottomwidth = dims.w + 2 * pad_size;
+ const int paddedbottomheight = dims.h + 2 * pad_size;
+
+ const int kernel_radius = kernel_size / 2; //size of unreachable border region (on each side)
+ const int border_size = max_displacement + kernel_radius; //size of unreachable border region (on each side)
+
+ const int neighborhood_grid_radius = max_displacement / stride2;
+ const int neighborhood_grid_width = 2 * neighborhood_grid_radius + 1;
+
+ const int top_width = (int)ceilf((float) (paddedbottomwidth - border_size * 2) / (float) stride1);
+ const int top_height = (int)ceilf((float)(paddedbottomheight - border_size * 2) / (float)stride1);
+ const int top_channels = (test.max_displacement + 1) * (test.max_displacement + 1);// neighborhood_grid_width * neighborhood_grid_width;
+
+ const auto inputTensors = IN_OUT_desc{dims.asVector(), dims.asVector()};
+ const auto outputTensors = IN_OUT_desc{{1, (uint32_t)top_channels, (uint32_t)top_height, (uint32_t)top_width}};
+
+ SetInputTensors(inputTensors);
+ SetOutputTensors(outputTensors);
+
+ std::map<std::string, std::string> params = {
+ {"top_width", std::to_string(top_width)},
+ {"top_height", std::to_string(top_height)},
+ {"width", std::to_string(dims.w)},
+ {"height", std::to_string(dims.h)},
+ {"channels", std::to_string(dims.c)},
+ {"displacement", std::to_string(max_displacement)},
+ {"pad", std::to_string(pad_size)},
+ {"neighborhood_grid_radius", std::to_string(neighborhood_grid_radius)},
+ {"neighborhood_grid_width", std::to_string(neighborhood_grid_width)},
+ {"kernel_size", std::to_string(kernel_size)},
+ {"stride", std::to_string(stride1) + "," + std::to_string(stride2)},
+ };
+
+ ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(
+ LayerInitParams("Correlate").params(params),
+ NetworkInitParams()
+ .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+ .lockLayout(true)));
+
+ std::vector<Blob::Ptr> input_blobs{};
+ input_blobs.reserve(_inputMap.size());
+ for (auto& input : _inputMap) {
+ // generate input data
+ for (int i = 0; i < dims.c * dims.h * dims. w; i++) {
+ const float corr_min = -1.744443f;
+ const float corr_max = 11.167725f;
+ float val = (corr_min + (float) rand() / ((float) RAND_MAX / (corr_max - corr_min + 1.f) + 1.f));
+
+ auto buf = input.second->buffer().as<ie_fp16*>();
+ buf[i] = PrecisionUtils::f32tof16(val);
+ }
+
+ input_blobs.push_back(input.second);
+ }
+ const int output_size = top_width * top_height * top_channels;
+ for (int i = 0; i < output_size; i++) {
+ _outputMap.begin()->second->buffer().as<ie_fp16*>()[i] = 0;
+ _refBlob->buffer().as<ie_fp16*>()[i] = 0;
+ }
+
+ ASSERT_TRUE(Infer());
+
+ refCorrelate(input_blobs[0], input_blobs[1], _refBlob, kernel_size, max_displacement, pad_size, stride1, stride2);
+
+ CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0.1f);
+}
+
+static const std::vector<CorrelateParams> s_CorrelateParams = {
+ { {1, 64, 48, 64}, 1, 8, 8, 1, 2 },
+ { {1, 127, 12, 64}, 3, 8, 8, 1, 2 },
+ { {1, 256, 48, 64}, 1, 20, 20, 1, 2 }
+};
+
+TEST_P(myriadLayersTestsSpatialTransform_smoke, SpatialTransform) {
+ const tensor_test_params dims = std::get<0>(GetParam());
+ const std::array<float, 6> theta = std::get<1>(GetParam());
+ const std::string customConfig = std::get<2>(GetParam());
+
+ if(!customConfig.empty() && !CheckMyriadX()) {
+ GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+ }
+ _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
+
+ SetInputTensors({dims.asVector(), {1, 1, 2, 3}});
+ SetOutputTensor(dims);
+
+ ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(
+ LayerInitParams("SpatialTransform"),
+ NetworkInitParams()
+ .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+ .lockLayout(true)));
+
+ auto theta_half = std::next(_inputMap.begin())->second;
+ for (int i = 0; i < 6; i++) {
+ theta_half->buffer().as<ie_fp16*>()[i] = PrecisionUtils::f32tof16(theta[i]);
+ }
+
+ ASSERT_TRUE(Infer());
+
+ ASSERT_NO_FATAL_FAILURE(refSpatialTransform(_inputMap.begin()->second,
+ std::next(_inputMap.begin())->second,
+ _refBlob));
+
+ CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0.001f);
+}
+
+static const std::vector<Dims> s_SpatialTransformInputs = {
+ {{ 1, 3, 24, 94 }},
+ {{ 1, 3, 96, 188 }},
+ {{ 1, 3, 97, 189 }},
+ {{ 1, 3, 98, 190 }},
+ {{ 1, 3, 384, 512 }},
+ {{ 1, 3, 24, 640 }},
+};
+
+static const std::vector<std::array<float, 6>> s_SpatialTransformTheta = {
+ {1.2f, 0.2f, -0.2f, 0.2f, 1.2f, -0.2f},
+ {1.f, 0.f, 0.f, 0.0f, 1.f, 0.f}
+};
#include "myriad_layers_deconvolution_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv, myriadLayerDeconvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 6, 5, 6))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 1), MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv_2, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv_2, myriadLayerDeconvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 2, 256, 14, 14))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 2, 2), MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerDeconvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 384, 4, 2))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 2, 2)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_depthDeconv, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_depthDeconv, myriadLayerDeconvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 384, 4, 2))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 2, 2)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerDeconvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 2, 37, 59)
, MAKE_STRUCT(tensor_test_params, 1, 21, 16, 16)
)
);
-INSTANTIATE_TEST_CASE_P(extra3x3s1, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(extra3x3s1, myriadLayerDeconvolution_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 256, 1, 1))
, ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
PRETTY_PARAM(hw_optimization, bool)
typedef myriadLayerTestBaseWithParam<tuple<DimsInput, kernel, stride, pad
- , out_channels, group, layoutPreference, hw_optimization >> myriadLayerDeconvolution_nightly;
+ , out_channels, group, layoutPreference, hw_optimization >> myriadLayerDeconvolution_smoke;
typedef myriadLayerTestBaseWithParam<tuple<DimsInput, kernel, stride, pad, pad_end
, out_channels, group, layoutPreference, hw_optimization >> myriadLayerDeconvolution_asymm_pad;
-TEST_P(myriadLayerDeconvolution_nightly, Deconvolution) {
+TEST_P(myriadLayerDeconvolution_smoke, Deconvolution) {
tensor_test_params input_dims = get<0>(GetParam());
param_size kernel = get<1>(GetParam());
param_size stride = get<2>(GetParam());
}
-class myriadDetectionOutputTests_nightly : public myriadLayersTests_nightly {
+class myriadDetectionOutputTests_smoke : public myriadLayersTests_nightly {
public:
std::vector<float> gen_locations;
std::vector<float> gen_confidence;
}
};
-TEST_F(myriadDetectionOutputTests_nightly, NoConst) {
+TEST_F(myriadDetectionOutputTests_smoke, NoConst) {
ASSERT_NO_FATAL_FAILURE(PrepareInput());
ASSERT_NO_FATAL_FAILURE(CalcRefOutput(false));
CheckResults();
}
-TEST_F(myriadDetectionOutputTests_nightly, MxNet) {
+TEST_F(myriadDetectionOutputTests_smoke, MxNet) {
ASSERT_NO_FATAL_FAILURE(PrepareInput());
ASSERT_NO_FATAL_FAILURE(CalcRefOutput(true));
CheckResults();
}
-TEST_F(myriadDetectionOutputTests_nightly, WithConst) {
+TEST_F(myriadDetectionOutputTests_smoke, WithConst) {
ASSERT_NO_FATAL_FAILURE(PrepareInput());
ASSERT_NO_FATAL_FAILURE(CalcRefOutput(false));
#include "myriad_layers_eltwise_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMax_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSum_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSum_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSub_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSub_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMul_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMul_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSumWithCoeff_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSumWithCoeff_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSumWithBroadcast_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSumWithBroadcast_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseInputs),
::testing::Values<int>(4))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSubWithCoeff_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSubWithCoeff_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSubWithBroadcast_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSubWithBroadcast_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::Values<int>(4))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseDiv_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseDiv_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMin_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMin_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSqDiff_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSqDiff_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwisePow_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwisePow_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseFloorMod_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseFloorMod_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseEqual_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseEqual_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseNotEqual_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseNotEqual_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseGreater_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseGreater_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseGreaterEqual_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseGreaterEqual_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLess_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLess_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLessEqual_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLessEqual_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalNot_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalNot_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyOneInput),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalAnd_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalAnd_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalOr_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalOr_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalXor_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalXor_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseInputs),
::testing::ValuesIn(s_eltwiseDims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMean_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMean_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
}
};
-class myriadTestsEltwiseMax_nightly: public EltwiseTest<ELTWISE_MAX>
+class myriadTestsEltwiseMax_smoke: public EltwiseTest<ELTWISE_MAX>
{
};
-class myriadTestsEltwiseSum_nightly: public EltwiseTest<ELTWISE_SUM>
+class myriadTestsEltwiseSum_smoke: public EltwiseTest<ELTWISE_SUM>
{
};
-class myriadTestsEltwiseSub_nightly: public EltwiseTest<ELTWISE_SUB>
+class myriadTestsEltwiseSub_smoke: public EltwiseTest<ELTWISE_SUB>
{
};
-class myriadTestsEltwiseMul_nightly: public EltwiseTest<ELTWISE_MUL>
+class myriadTestsEltwiseMul_smoke: public EltwiseTest<ELTWISE_MUL>
{
};
-class myriadTestsEltwiseSumWithCoeff_nightly: public EltwiseTest<ELTWISE_SUM>
+class myriadTestsEltwiseSumWithCoeff_smoke: public EltwiseTest<ELTWISE_SUM>
{
};
-class myriadTestsEltwiseSubWithCoeff_nightly: public EltwiseTest<ELTWISE_SUB>
+class myriadTestsEltwiseSubWithCoeff_smoke: public EltwiseTest<ELTWISE_SUB>
{
};
-class myriadTestsEltwiseSumWithBroadcast_nightly: public EltwiseTest<ELTWISE_SUM>
+class myriadTestsEltwiseSumWithBroadcast_smoke: public EltwiseTest<ELTWISE_SUM>
{
};
-class myriadTestsEltwiseSubWithBroadcast_nightly: public EltwiseTest<ELTWISE_SUB>
+class myriadTestsEltwiseSubWithBroadcast_smoke: public EltwiseTest<ELTWISE_SUB>
{
};
-class myriadTestsEltwiseDiv_nightly: public EltwiseTest<ELTWISE_DIV>
+class myriadTestsEltwiseDiv_smoke: public EltwiseTest<ELTWISE_DIV>
{
};
-class myriadTestsEltwiseMin_nightly: public EltwiseTest<ELTWISE_MIN>
+class myriadTestsEltwiseMin_smoke: public EltwiseTest<ELTWISE_MIN>
{
};
-class myriadTestsEltwiseSqDiff_nightly: public EltwiseTest<ELTWISE_SQDIFF>
+class myriadTestsEltwiseSqDiff_smoke: public EltwiseTest<ELTWISE_SQDIFF>
{
};
-class myriadTestsEltwisePow_nightly: public EltwiseTest<ELTWISE_POW>
+class myriadTestsEltwisePow_smoke: public EltwiseTest<ELTWISE_POW>
{
void SetUp() override {
EltwiseTest::SetUp();
}
};
-class myriadTestsEltwiseFloorMod_nightly: public EltwiseTest<ELTWISE_FLOOR_MOD>
+class myriadTestsEltwiseFloorMod_smoke: public EltwiseTest<ELTWISE_FLOOR_MOD>
{
};
-class myriadTestsEltwiseEqual_nightly: public EltwiseTest<ELTWISE_EQUAL>
+class myriadTestsEltwiseEqual_smoke: public EltwiseTest<ELTWISE_EQUAL>
{
};
-class myriadTestsEltwiseNotEqual_nightly: public EltwiseTest<ELTWISE_NOT_EQUAL>
+class myriadTestsEltwiseNotEqual_smoke: public EltwiseTest<ELTWISE_NOT_EQUAL>
{
};
-class myriadTestsEltwiseGreater_nightly: public EltwiseTest<ELTWISE_GREATER>
+class myriadTestsEltwiseGreater_smoke: public EltwiseTest<ELTWISE_GREATER>
{
};
-class myriadTestsEltwiseGreaterEqual_nightly: public EltwiseTest<ELTWISE_GREATER_EQUAL>
+class myriadTestsEltwiseGreaterEqual_smoke: public EltwiseTest<ELTWISE_GREATER_EQUAL>
{
};
-class myriadTestsEltwiseLess_nightly: public EltwiseTest<ELTWISE_LESS>
+class myriadTestsEltwiseLess_smoke: public EltwiseTest<ELTWISE_LESS>
{
};
-class myriadTestsEltwiseLessEqual_nightly: public EltwiseTest<ELTWISE_LESS_EQUAL>
+class myriadTestsEltwiseLessEqual_smoke: public EltwiseTest<ELTWISE_LESS_EQUAL>
{
};
-class myriadTestsEltwiseLogicalNot_nightly: public EltwiseTest<ELTWISE_LOGICAL_NOT>
+class myriadTestsEltwiseLogicalNot_smoke: public EltwiseTest<ELTWISE_LOGICAL_NOT>
{
void SetUp() override {
EltwiseTest::SetUp();
}
};
-class myriadTestsEltwiseLogicalAnd_nightly: public EltwiseTest<ELTWISE_LOGICAL_AND>
+class myriadTestsEltwiseLogicalAnd_smoke: public EltwiseTest<ELTWISE_LOGICAL_AND>
{
void SetUp() override {
EltwiseTest::SetUp();
}
};
-class myriadTestsEltwiseLogicalOr_nightly: public EltwiseTest<ELTWISE_LOGICAL_OR>
+class myriadTestsEltwiseLogicalOr_smoke: public EltwiseTest<ELTWISE_LOGICAL_OR>
{
void SetUp() override {
EltwiseTest::SetUp();
}
};
-class myriadTestsEltwiseLogicalXor_nightly: public EltwiseTest<ELTWISE_LOGICAL_XOR>
+class myriadTestsEltwiseLogicalXor_smoke: public EltwiseTest<ELTWISE_LOGICAL_XOR>
{
void SetUp() override {
EltwiseTest::SetUp();
}
};
-class myriadTestsEltwiseMean_nightly: public EltwiseTest<ELTWISE_MEAN>
+class myriadTestsEltwiseMean_smoke: public EltwiseTest<ELTWISE_MEAN>
{
};
-TEST_P(myriadTestsEltwiseMax_nightly, Max)
+TEST_P(myriadTestsEltwiseMax_smoke, Max)
{
InitBody();
}
-TEST_P(myriadTestsEltwiseSum_nightly, Sum)
+TEST_P(myriadTestsEltwiseSum_smoke, Sum)
{
InitBody();
}
-TEST_P(myriadTestsEltwiseSub_nightly, Sub)
+TEST_P(myriadTestsEltwiseSub_smoke, Sub)
{
InitBody();
}
-TEST_P(myriadTestsEltwiseMul_nightly, Mul)
+TEST_P(myriadTestsEltwiseMul_smoke, Mul)
{
InitBody();
}
-TEST_P(myriadTestsEltwiseSumWithCoeff_nightly, Sum)
+TEST_P(myriadTestsEltwiseSumWithCoeff_smoke, Sum)
{
InitBody(true);
}
-TEST_P(myriadTestsEltwiseSubWithCoeff_nightly, Sub)
+TEST_P(myriadTestsEltwiseSubWithCoeff_smoke, Sub)
{
InitBody(true);
}
-TEST_P(myriadTestsEltwiseSumWithBroadcast_nightly, Sum)
+TEST_P(myriadTestsEltwiseSumWithBroadcast_smoke, Sum)
{
InitBody(false, true);
}
-TEST_P(myriadTestsEltwiseSubWithBroadcast_nightly, Sub)
+TEST_P(myriadTestsEltwiseSubWithBroadcast_smoke, Sub)
{
InitBody(false, true);
}
-TEST_P(myriadTestsEltwiseDiv_nightly, Div)
+TEST_P(myriadTestsEltwiseDiv_smoke, Div)
{
InitBody();
}
-TEST_P(myriadTestsEltwiseMin_nightly, Min)
+TEST_P(myriadTestsEltwiseMin_smoke, Min)
{
InitBody();
}
-TEST_P(myriadTestsEltwiseSqDiff_nightly, SqDiff)
+TEST_P(myriadTestsEltwiseSqDiff_smoke, SqDiff)
{
InitBody();
}
-TEST_P(myriadTestsEltwisePow_nightly, Pow)
+TEST_P(myriadTestsEltwisePow_smoke, Pow)
{
InitBody();
}
-TEST_P(myriadTestsEltwiseFloorMod_nightly, FloorMod)
+TEST_P(myriadTestsEltwiseFloorMod_smoke, FloorMod)
{
InitBody();
}
-TEST_P(myriadTestsEltwiseEqual_nightly, Equal)
+TEST_P(myriadTestsEltwiseEqual_smoke, Equal)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseNotEqual_nightly, NotEqual)
+TEST_P(myriadTestsEltwiseNotEqual_smoke, NotEqual)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseGreater_nightly, Greater)
+TEST_P(myriadTestsEltwiseGreater_smoke, Greater)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseGreaterEqual_nightly, GreaterEqual)
+TEST_P(myriadTestsEltwiseGreaterEqual_smoke, GreaterEqual)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseLess_nightly, Less)
+TEST_P(myriadTestsEltwiseLess_smoke, Less)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseLessEqual_nightly, LessEqual)
+TEST_P(myriadTestsEltwiseLessEqual_smoke, LessEqual)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseLogicalNot_nightly, LogicalNot)
+TEST_P(myriadTestsEltwiseLogicalNot_smoke, LogicalNot)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseLogicalAnd_nightly, LogicalAnd)
+TEST_P(myriadTestsEltwiseLogicalAnd_smoke, LogicalAnd)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseLogicalOr_nightly, LogicalOr)
+TEST_P(myriadTestsEltwiseLogicalOr_smoke, LogicalOr)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseLogicalXor_nightly, LogicalXor)
+TEST_P(myriadTestsEltwiseLogicalXor_smoke, LogicalXor)
{
InitBody(false, false, true);
}
-TEST_P(myriadTestsEltwiseMean_nightly, Mean)
+TEST_P(myriadTestsEltwiseMean_smoke, Mean)
{
InitBody();
}
#include "myriad_layers_elu_test.hpp"
-INSTANTIATE_TEST_CASE_P( accuracy, myriadLayersTestsELUParams,
+INSTANTIATE_TEST_CASE_P( accuracy, myriadLayersTestsELUParams_smoke,
::testing::Combine(
::testing::ValuesIn(s_powerTensors),
::testing::ValuesIn(s_powerParams))
}
}
-typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, alpha>> myriadLayersTestsELUParams;
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, alpha>> myriadLayersTestsELUParams_smoke;
-TEST_P(myriadLayersTestsELUParams, TestsELU) {
+TEST_P(myriadLayersTestsELUParams_smoke, TestsELU) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
auto param = GetParam();
#include "myriad_layers_erf_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsErf_nightly,
+ accuracy, myriadLayersTestsErf_smoke,
::testing::ValuesIn(s_ErfDims));
}
}
-class myriadLayersTestsErf_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsErf_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<SizeVector> {
public:
};
-TEST_P(myriadLayersTestsErf_nightly, TestsErf)
+TEST_P(myriadLayersTestsErf_smoke, TestsErf)
{
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
auto p = ::testing::WithParamInterface<SizeVector>::GetParam();
{{ 10.0, 10.0, 5.0, 5.0 }, 4.135166645050049, 0.5, 0.05, _MaxDetections, _NumClasses, 2000, 0 },
};
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsExpDetectionOutput_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsExpDetectionOutput_smoke,
::testing::Combine(
::testing::ValuesIn(s_sizeParams_list),
::testing::ValuesIn(s_layerParams_list))
}
};
-class myriadTestsExpDetectionOutput_nightly: public ExpDetectionOutputTest
+class myriadTestsExpDetectionOutput_smoke: public ExpDetectionOutputTest
{
};
-TEST_P(myriadTestsExpDetectionOutput_nightly, ExpDetectionOutput)
+TEST_P(myriadTestsExpDetectionOutput_smoke, ExpDetectionOutput)
{
testExpDetectionOutput();
}
#include "myriad_layers_exp_generateproposals_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpGenerateProposals_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpGenerateProposals_smoke,
::testing::Combine(
::testing::ValuesIn(s_ExpGenerateProposalsLayerScores),
::testing::ValuesIn(s_ExpGenerateProposalsLayerImInfo),
using ExpGenerateProposalsTestParams = std::tuple<Dims, std::vector<int>, GenerateProposalsParam>;
-typedef myriadLayerTestBaseWithParam<ExpGenerateProposalsTestParams> myriadLayersTestsExpGenerateProposals_nightly;
+typedef myriadLayerTestBaseWithParam<ExpGenerateProposalsTestParams> myriadLayersTestsExpGenerateProposals_smoke;
static void genInputs(InferenceEngine::BlobMap inputMap,
const int numProposals,
inputIMinfo[1] = PrecisionUtils::f32tof16( (float) imgW );
}
-TEST_P(myriadLayersTestsExpGenerateProposals_nightly, ExpGenerateProposals) {
+TEST_P(myriadLayersTestsExpGenerateProposals_smoke, ExpGenerateProposals) {
tensor_test_params scoresDims = std::get<0>(GetParam());
std::vector<int> im_info = std::get<1>(GetParam());
GenerateProposalsParam opParams = std::get<2>(GetParam());
#include "myriad_layers_exp_priorgridgenerator_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpPriorGridGenerator_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpPriorGridGenerator_smoke,
::testing::Combine(
::testing::ValuesIn(s_ExpPriorGridGeneratorLayerInputs),
::testing::ValuesIn(s_ExpPriorGridGeneratorLayerParam))
using ExpPriorGridGeneratorTestParams = std::tuple<InputDims, PriorGridGeneratorParam>;
-typedef myriadLayerTestBaseWithParam<ExpPriorGridGeneratorTestParams> myriadLayersTestsExpPriorGridGenerator_nightly;
+typedef myriadLayerTestBaseWithParam<ExpPriorGridGeneratorTestParams> myriadLayersTestsExpPriorGridGenerator_smoke;
static void genPriors(InferenceEngine::Blob::Ptr rois,
const tensor_test_params& params,
}
}
-TEST_P(myriadLayersTestsExpPriorGridGenerator_nightly, ExpPriorGridGenerator) {
+TEST_P(myriadLayersTestsExpPriorGridGenerator_smoke, ExpPriorGridGenerator) {
InputDims inputTensorsDims = std::get<0>(GetParam());
PriorGridGeneratorParam opParams = std::get<1>(GetParam());
#include "myriad_layers_exp_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsExp_nightly,
+ accuracy, myriadLayersTestsExp_smoke,
::testing::ValuesIn(s_expParams));
using namespace InferenceEngine;
-class myriadLayersTestsExp_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsExp_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<Dims> {};
-TEST_P(myriadLayersTestsExp_nightly, TestsExp)
+TEST_P(myriadLayersTestsExp_smoke, TestsExp)
{
auto p = ::testing::WithParamInterface<Dims>::GetParam();
SetInputTensor(p);
#include "myriad_layers_exp_topkrois_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpTopKROIs_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpTopKROIs_smoke,
::testing::Combine(
::testing::ValuesIn(s_ExpTopKROIsInputRoisNum),
::testing::ValuesIn(s_ExpTopKROIsMaxRoisNum))
using ExpTopKROIsTestParams = std::tuple<int, TopKROIsParam>;
-typedef myriadLayerTestBaseWithParam<ExpTopKROIsTestParams> myriadLayersTestsExpTopKROIs_nightly;
+typedef myriadLayerTestBaseWithParam<ExpTopKROIsTestParams> myriadLayersTestsExpTopKROIs_smoke;
static void genInputs(InferenceEngine::BlobMap inputMap) {
const std::string INPUT_ROIS = "input0";
}
}
-TEST_P(myriadLayersTestsExpTopKROIs_nightly, ExpTopKROIs) {
+TEST_P(myriadLayersTestsExpTopKROIs_smoke, ExpTopKROIs) {
int inputRoisNum = std::get<0>(GetParam());
TopKROIsParam opParams = std::get<1>(GetParam());
#include "myriad_layers_flatten_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFlatten_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFlatten_smoke,
::testing::Combine(
::testing::ValuesIn(s_flattenTensors),
::testing::ValuesIn(s_flattenAxis)
#include "myriad_layers_tests.hpp"
-typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, int32_t>> myriadLayersTestsFlatten_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, int32_t>> myriadLayersTestsFlatten_smoke;
static void ref_flatten(const InferenceEngine::Blob::Ptr src,
InferenceEngine::Blob::Ptr dst) {
}
}
-TEST_P(myriadLayersTestsFlatten_nightly, Flatten) {
+TEST_P(myriadLayersTestsFlatten_smoke, Flatten) {
auto input = std::get<0>(GetParam());
int32_t axis_val = std::get<1>(GetParam());
IN_OUT_desc input_tensor;
#include "myriad_layers_floor_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsFloor_nightly,
+ accuracy, myriadLayersTestsFloor_smoke,
::testing::ValuesIn(s_FloorParams));
}
}
-class myriadLayersTestsFloor_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsFloor_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<Dims> {
public:
};
-TEST_P(myriadLayersTestsFloor_nightly, TestsFloor)
+TEST_P(myriadLayersTestsFloor_smoke, TestsFloor)
{
auto p = ::testing::WithParamInterface<Dims>::GetParam();
SetInputTensor(p);
#include "myriad_layers_fully_connected_tests.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsFullyConnected_nightly,
+ accuracy, myriadLayersTestsFullyConnected_smoke,
::testing::ValuesIn(s_fcTestParams)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFullyConnectedBatch_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFullyConnectedBatch_smoke,
::testing::Combine(
::testing::ValuesIn(s_fcTestBatchParams)
, ::testing::ValuesIn(s_fcTestBatchOutSizes)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFullyConnectedPVA_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFullyConnectedPVA_smoke,
::testing::Combine(
::testing::ValuesIn(s_fcTestPVAParams)
, ::testing::ValuesIn(s_fcTestPVAOutSizes)
using namespace InferenceEngine;
-class myriadLayersTestsFullyConnected_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsFullyConnected_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<fcon_test_params> {
};
typedef std::tuple<InferenceEngine::SizeVector, uint32_t> IR3_FC_params;
-class myriadLayersTestsFullyConnectedBatch_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsFullyConnectedBatch_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<IR3_FC_params> {
};
-TEST_P(myriadLayersTestsFullyConnected_nightly, TestsFullyConnected)
+TEST_P(myriadLayersTestsFullyConnected_smoke, TestsFullyConnected)
{
fcon_test_params p = ::testing::WithParamInterface<fcon_test_params>::GetParam();
}
-TEST_P(myriadLayersTestsFullyConnectedBatch_nightly, TestsFullyConnected)
+TEST_P(myriadLayersTestsFullyConnectedBatch_smoke, TestsFullyConnected)
{
auto p = ::testing::WithParamInterface<IR3_FC_params>::GetParam();
auto input_tensor = std::get<0>(p);
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.02);
}
-class myriadLayersTestsFullyConnectedPVA_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsFullyConnectedPVA_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<IR3_FC_params> {
};
-TEST_P(myriadLayersTestsFullyConnectedPVA_nightly, TestsFullyConnected)
+TEST_P(myriadLayersTestsFullyConnectedPVA_smoke, TestsFullyConnected)
{
auto p = ::testing::WithParamInterface<IR3_FC_params>::GetParam();
auto input_tensor = std::get<0>(p);
using namespace testing;
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerGather_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerGather_smoke,
Values(GatherTestParams { {36549, 1024}, {16}, 0, "FP16" },
GatherTestParams { {10}, {10}, 0, "FP16" },
GatherTestParams { {36549, 1024}, {10}, 0, "FP16" },
Axis,
Type>;
-class myriadLayerGather_nightly :
+class myriadLayerGather_smoke :
public myriadLayerTestBaseWithParam<GatherTestParams> {
protected:
}
};
-TEST_P(myriadLayerGather_nightly, Gather) {
+TEST_P(myriadLayerGather_smoke, Gather) {
testGather();
}
#include "myriad_layers_gemm_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerGEMM,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerGEMM_smoke,
::testing::Combine(
::testing::Values<gemm_parameters>(
MAKE_STRUCT(gemm_parameters, 4.7f, 2.3f, 5, 7, 11, 1, 2, 3, 4, 5, 6, 7, 8),
}
}
-typedef myriadLayerTestBaseWithParam<tuple<gemm_parameters, layoutPreference, hasThreeInputs, transposeA, transposeB>> myriadLayerGEMM;
+typedef myriadLayerTestBaseWithParam<tuple<gemm_parameters, layoutPreference, hasThreeInputs, transposeA, transposeB>> myriadLayerGEMM_smoke;
-TEST_P(myriadLayerGEMM, GEMM) {
+TEST_P(myriadLayerGEMM_smoke, GEMM) {
gemm_parameters gemm_parameter = get<0>(GetParam());
auto layoutPreference = get<1>(GetParam());
auto hasThreeInputs = get<2>(GetParam());
#include "myriad_layers_grn_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGRN_nightly,
- ::testing::Combine(
- ::testing::ValuesIn(s_GRNTensors),
- ::testing::ValuesIn(s_GRN_bias),
- ::testing::ValuesIn(s_MVNCustomConfig)));
-
-
-TEST_F(myriadLayersTests_nightly, GRN_CHW_Input)
-{
- std::string model = R"V0G0N(
- <net name="GRN" version="2" batch="1">
- <layers>
- <layer name="data" type="Input" precision="FP16" id="1">
- <output>
- <port id="1">
- <dim>1</dim>
- <dim>24</dim>
- <dim>128</dim>
- <dim>224</dim>
- </port>
- </output>
- </layer>
- <layer name="grn" type="GRN" precision="FP16" id="2">
- <data bias="0.5"/>
- <input>
- <port id="2">
- <dim>1</dim>
- <dim>24</dim>
- <dim>128</dim>
- <dim>224</dim>
- </port>
- </input>
- <output>
- <port id="3">
- <dim>1</dim>
- <dim>24</dim>
- <dim>128</dim>
- <dim>224</dim>
- </port>
- </output>
- </layer>
- </layers>
- <edges>
- <edge from-layer="1" from-port="1" to-layer="2" to-port="2"/>
- </edges>
- </net>
- )V0G0N";
-
- StatusCode st;
-
- ASSERT_NO_THROW(readNetwork(model));
-
- const auto& network = _cnnNetwork;
-
- _inputsInfo = network.getInputsInfo();
- _inputsInfo["data"]->setPrecision(Precision::FP16);
-
- _outputsInfo = network.getOutputsInfo();
- _outputsInfo["grn"]->setPrecision(Precision::FP16);
-
- ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(_exeNetwork, network, {}, &_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
- ASSERT_NE(_exeNetwork, nullptr) << _resp.msg;
-
- ASSERT_NO_THROW(st = _exeNetwork->CreateInferRequest(_inferRequest, &_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
- auto tensorDesc = TensorDesc(Precision::FP16, _inputsInfo["data"]->getTensorDesc().getDims(), Layout::NCHW);
-
- auto inputNCHW = make_shared_blob<ie_fp16>(tensorDesc);
- ASSERT_NO_THROW(inputNCHW->allocate());
-
- auto outputNCHW = make_shared_blob<ie_fp16>(tensorDesc);
- ASSERT_NO_THROW(outputNCHW->allocate());
-
- auto output_ref = make_shared_blob<ie_fp16>(tensorDesc);
- ASSERT_NO_THROW(output_ref->allocate());
-
- ASSERT_NO_THROW(GenRandomData(inputNCHW));
-
- ASSERT_NO_THROW(st = _inferRequest->SetBlob("data", inputNCHW, &_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
- ASSERT_NO_THROW(st = _inferRequest->SetBlob("grn", outputNCHW, &_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
- ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
- ASSERT_NO_FATAL_FAILURE(refGRN(inputNCHW, output_ref, 0.5f, true));
-
- CompareCommonAbsolute(outputNCHW, output_ref, 0.003);
-}
+INSTANTIATE_TEST_CASE_P(
+ accuracy, myriadLayersTestsGRN_smoke,
+ ::testing::Combine(
+ ::testing::ValuesIn(s_GRNInputs),
+ ::testing::Values<Bias>(0.5f, 10.f, 1.f),
+ ::testing::Values<IRVersion>(IRVersion::v7, IRVersion::v10),
+ ::testing::ValuesIn(s_CustomConfig)
+));
}
PRETTY_PARAM(Bias, float)
+PRETTY_PARAM(CustomConfig, std::string)
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Bias, std::string>> myriadLayersTestsGRN_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, Bias, IRVersion, CustomConfig>> myriadLayersTestsGRN_smoke;
-TEST_P(myriadLayersTestsGRN_nightly, GRN) {
- tensor_test_params dims = std::get<0>(GetParam());
- float bias = std::get<1>(GetParam());
- std::string customConfig = std::get<2>(GetParam());
+TEST_P(myriadLayersTestsGRN_smoke, GRN) {
+ const SizeVector dims = std::get<0>(GetParam());
+ const float bias = std::get<1>(GetParam());
+ _irVersion = std::get<2>(GetParam());
+ const std::string customConfig = std::get<3>(GetParam());
- if(!customConfig.empty() && !CheckMyriadX()) {
- GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
+ if (!customConfig.empty() && !CheckMyriadX()) {
+ GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
}
_config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
- SetInputTensor(dims);
- SetOutputTensor(dims);
+ SetInputTensors({dims});
+ SetOutputTensors({dims});
std::map<std::string, std::string> params;
params["bias"] = std::to_string(bias);
- ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("GRN").params(params)));
+ ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("GRN").params(params),
+ NetworkInitParams()
+ .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+ .lockLayout(true)));
ASSERT_TRUE(Infer());
- ASSERT_NO_FATAL_FAILURE(refGRN(_inputMap.begin()->second, _refBlob, bias, false));
+ ASSERT_NO_FATAL_FAILURE(refGRN(_inputMap.begin()->second, _refBlob, bias, true));
CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
}
-static std::vector<Dims> s_GRNTensors = {
- {{1, 3, 16, 224}},
- {{1, 24, 128, 224}},
-};
-
-static std::vector<Bias> s_GRN_bias = {
- 0.5f, 10.f
-};
-
-static std::vector<std::string> s_MVNCustomConfig = {
- "" ,
+static std::vector<CustomConfig> s_CustomConfig = {
+ {""} ,
#ifdef VPU_HAS_CUSTOM_KERNELS
getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
#endif
};
+
+static std::vector<SizeVector> s_GRNInputs = {
+ {1, 3, 16, 224},
+ {1, 24, 128, 224},
+};
\ No newline at end of file
#include "myriad_layers_interp_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsInterp_nightly,
+ accuracy, myriadLayersTestsInterp_smoke,
::testing::Combine(
::testing::Values<SizeInputOutput>(
PRETTY_PARAM(SizeInputOutput, interp_test_params)
PRETTY_PARAM(align_corners, bool)
-typedef myriadLayerTestBaseWithParam<tuple<interp_test_params, layoutPreference, align_corners>> myriadLayersTestsInterp_nightly;
+typedef myriadLayerTestBaseWithParam<tuple<interp_test_params, layoutPreference, align_corners>> myriadLayersTestsInterp_smoke;
void ref_interp(const Blob::Ptr src,
}
}
-TEST_P(myriadLayersTestsInterp_nightly, Interp)
+TEST_P(myriadLayersTestsInterp_smoke, Interp)
{
interp_test_params test_params = get<0>(GetParam());
auto layoutPreference = get<1>(GetParam());
#include "myriad_layers_log_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsLog_nightly,
+ accuracy, myriadLayersTestsLog_smoke,
::testing::ValuesIn(s_logParams));
using namespace InferenceEngine;
-class myriadLayersTestsLog_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsLog_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<Dims> {
public:
};
-TEST_P(myriadLayersTestsLog_nightly, TestsLog)
+TEST_P(myriadLayersTestsLog_smoke, TestsLog)
{
auto p = ::testing::WithParamInterface<Dims>::GetParam();
SetInputTensor(p);
#include "myriad_layers_lrn_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsLRN_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsLRN_smoke,
::testing::Combine(
::testing::ValuesIn(s_LRNTensors),
::testing::ValuesIn(s_LRNlocal_size),
PRETTY_PARAM(alpha, float)
PRETTY_PARAM(beta, float)
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, local_size, k_val, alpha, beta>> myriadLayersTestsLRN_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, local_size, k_val, alpha, beta>> myriadLayersTestsLRN_smoke;
-TEST_P(myriadLayersTestsLRN_nightly, LRN) {
+TEST_P(myriadLayersTestsLRN_smoke, LRN) {
tensor_test_params dims = std::get<0>(GetParam());
uint32_t local_v = std::get<1>(GetParam());
float k = std::get<2>(GetParam());
CompareCommonAbsolute(dst, _refBlob, ERROR_BOUND);
}
-TEST_P(myriadLayersTestsLRN_nightly, InnerLRN) {
+TEST_P(myriadLayersTestsLRN_smoke, InnerLRN) {
tensor_test_params dims = std::get<0>(GetParam());
uint32_t local_v = std::get<1>(GetParam());
float k = std::get<2>(GetParam());
CompareCommonAbsolute(output, refOut0, ERROR_BOUND);
}
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsLSTMCell_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsLSTMCell_smoke,
::testing::Values<lstmcell_test_params>(MAKE_STRUCT(lstmcell_test_params, 512, 128)),
);
<< ", state size = " << tst.state_size;
};
};
-typedef myriadLayerTestBaseWithParam<lstmcell_test_params> myriadLayersTestsLSTMCell_nightly;
+typedef myriadLayerTestBaseWithParam<lstmcell_test_params> myriadLayersTestsLSTMCell_smoke;
#define f32Tof16 PrecisionUtils::f32tof16
#define f16Tof32 PrecisionUtils::f16tof32
}
}
-TEST_P(myriadLayersTestsLSTMCell_nightly, LSTMCell) {
+TEST_P(myriadLayersTestsLSTMCell_smoke, LSTMCell) {
auto param = GetParam();
lstmcell_test_params test_params = param;
#include "myriad_layers_mvn_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMVN_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMVN_smoke,
::testing::Combine(
::testing::ValuesIn(s_MVNTensors),
::testing::ValuesIn(s_MVN_acrossChannels),
::testing::ValuesIn(s_MVN_normalize),
::testing::ValuesIn(s_MVN_epsilon),
+ ::testing::Values(IRVersion::v7, IRVersion::v10),
::testing::ValuesIn(s_MVNCustomConfig)));
TEST_F(myriadLayersTests_nightly, MVN_CHW_Input)
PRETTY_PARAM(Normalize, int)
PRETTY_PARAM(Epsilon, float)
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, AcrossChannels, Normalize, Epsilon, std::string>> myriadLayersTestsMVN_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, AcrossChannels, Normalize, Epsilon, IRVersion, std::string>> myriadLayersTestsMVN_smoke;
-TEST_P(myriadLayersTestsMVN_nightly, MVN)
+TEST_P(myriadLayersTestsMVN_smoke, MVN)
{
tensor_test_params dims = std::get<0>(GetParam());
int acrossChannels = std::get<1>(GetParam());
int normalize = std::get<2>(GetParam());
float eps = std::get<3>(GetParam());
- std::string customConfig = std::get<4>(GetParam());
+ _irVersion = std::get<4>(GetParam());
+ std::string customConfig = std::get<5>(GetParam());
if(!customConfig.empty() && !CheckMyriadX()) {
GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
#include "myriad_layers_nms_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsNonMaxSuppression_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsNonMaxSuppression_smoke,
::testing::Values(
MAKE_STRUCT(NMS_testParams,
{6, 1, 1}, // {spatial_dimension, num_classes, num_batches}
}
}
-typedef myriadLayerTestBaseWithParam<NMS_testParams> myriadLayersTestsNonMaxSuppression_nightly;
+typedef myriadLayerTestBaseWithParam<NMS_testParams> myriadLayersTestsNonMaxSuppression_smoke;
-TEST_P(myriadLayersTestsNonMaxSuppression_nightly, NonMaxSuppression) {
+TEST_P(myriadLayersTestsNonMaxSuppression_smoke, NonMaxSuppression) {
const auto params = GetParam();
const int spatDim = params.dims[0];
const int numClasses = params.dims[1];
#include "myriad_layers_nonzero_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestNonZero_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestNonZero_smoke,
::testing::ValuesIn(inputDims));
using namespace InferenceEngine;
-class myriadLayerTestNonZero_nightly: public myriadLayersTests_nightly,
+class myriadLayerTestNonZero_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<SizeVector> {
public:
void testNonZero(vpu::LayoutPreference preference, Precision precision);
}
};
-void myriadLayerTestNonZero_nightly::testNonZero(vpu::LayoutPreference preference, Precision precision) {
+void myriadLayerTestNonZero_smoke::testNonZero(vpu::LayoutPreference preference, Precision precision) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
const auto& inputDims = GetParam();
CompareNonZero(outputIndicesBlob, refIndicesBlob, outputDimsBlob, refDimsBlob);
}
-TEST_P(myriadLayerTestNonZero_nightly, NonZero) {
+TEST_P(myriadLayerTestNonZero_smoke, NonZero) {
testNonZero(vpu::LayoutPreference::ChannelMajor, Precision::FP16);
}
-TEST_P(myriadLayerTestNonZero_nightly, NonZeroNHWC) {
+TEST_P(myriadLayerTestNonZero_smoke, NonZeroNHWC) {
testNonZero(vpu::LayoutPreference::ChannelMinor, Precision::FP16);
}
-TEST_P(myriadLayerTestNonZero_nightly, NonZeroI32) {
+TEST_P(myriadLayerTestNonZero_smoke, NonZeroI32) {
testNonZero(vpu::LayoutPreference::ChannelMajor, Precision::I32);
}
-TEST_P(myriadLayerTestNonZero_nightly, NonZeroU8) {
+TEST_P(myriadLayerTestNonZero_smoke, NonZeroU8) {
testNonZero(vpu::LayoutPreference::ChannelMajor, Precision::U8);
}
#include "myriad_layers_normalize_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsNormalize_nightly, ::testing::Combine(
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsNormalize_smoke, ::testing::Combine(
::testing::Values<Dims>(
// small size, num_channels is not divisible by 8
MAKE_STRUCT(tensor_test_params, 1, 33, 1, 1),
));
-INSTANTIATE_TEST_CASE_P(accuracy_more, myriadLayersTestsNormalize_nightly, ::testing::Combine(
+INSTANTIATE_TEST_CASE_P(accuracy_more, myriadLayersTestsNormalize_smoke, ::testing::Combine(
::testing::Values<Dims>(
//more tests
MAKE_STRUCT(tensor_test_params, 1, 1, 38, 38),
PRETTY_PARAM(ChannelSharedNormalize, bool)
PRETTY_PARAM(EPS, float)
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, AcrossSpatial, ChannelSharedNormalize, EPS>> myriadLayersTestsNormalize_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, AcrossSpatial, ChannelSharedNormalize, EPS>> myriadLayersTestsNormalize_smoke;
-TEST_P(myriadLayersTestsNormalize_nightly, Normalize) {
+TEST_P(myriadLayersTestsNormalize_smoke, Normalize) {
tensor_test_params dims = std::get<0>(GetParam());
int across_spatial = std::get<1>(GetParam());
int channel_shared = std::get<2>(GetParam());
#include "myriad_layers_oneHot_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestOneHot_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestOneHot_smoke,
::testing::Values<oneHot_test_params>(
MAKE_STRUCT(OneHotParams, {64}, 2, {0}, {}, {}),
MAKE_STRUCT(OneHotParams, {64}, 2, {-1}, {}, {}),
MAKE_STRUCT(OneHotParams, {4, 8, 16, 32, 64}, 2, {-1}, {}, {})
));
-INSTANTIATE_TEST_CASE_P(accuracy_add, myriadLayerTestOneHot_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_add, myriadLayerTestOneHot_smoke,
::testing::Values<oneHot_test_params>(
MAKE_STRUCT(OneHotParams, {16, 32, 64}, 2, {2}, {}, {}),
MAKE_STRUCT(OneHotParams, {8, 16, 32,64}, 2, {2}, {}, {}),
}
}
-typedef myriadLayerTestBaseWithParam<oneHot_test_params> myriadLayerTestOneHot_nightly;
+typedef myriadLayerTestBaseWithParam<oneHot_test_params> myriadLayerTestOneHot_smoke;
-TEST_P(myriadLayerTestOneHot_nightly, OneHot) {
+TEST_P(myriadLayerTestOneHot_smoke, OneHot) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
OneHotParams testParams = GetParam();
#include "myriad_layers_pad_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerPad,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerPad_smoke,
::testing::Combine(
::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 64, 16, 16)),
::testing::Values<pad_parameters>(MAKE_STRUCT(pad_parameters, 0, 32, 1, 2, 0, 32, 3, 4)),
PRETTY_PARAM(layoutPreference, vpu::LayoutPreference);
PRETTY_PARAM(pad_mode, std::string);
-typedef myriadLayerTestBaseWithParam<std::tuple<DimsInput, pad_parameters, layoutPreference, pad_mode, IRVersion>> myriadLayerPad;
+typedef myriadLayerTestBaseWithParam<std::tuple<DimsInput, pad_parameters, layoutPreference, pad_mode, IRVersion>> myriadLayerPad_smoke;
const float pad_value = 42.0f;
}
}
-TEST_P(myriadLayerPad, Pad) {
+TEST_P(myriadLayerPad_smoke, Pad) {
tensor_test_params input_dims = get<0>(GetParam());
pad_parameters pad_parameter = get<1>(GetParam());
auto layoutPreference = get<2>(GetParam());
#include "myriad_layers_permute_nd_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy_2D, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2D, myriadLayersPermuteNDTests_smoke,
::testing::Combine(
::testing::ValuesIn(s_inTensors_2D)
, ::testing::ValuesIn(s_permuteTensors_2D)
, ::testing::ValuesIn(s_permutePrecisions)
));
-INSTANTIATE_TEST_CASE_P(accuracy_3D, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3D, myriadLayersPermuteNDTests_smoke,
::testing::Combine(
::testing::ValuesIn(s_inTensors_3D)
, ::testing::ValuesIn(s_permuteTensors_3D)
, ::testing::ValuesIn(s_permutePrecisions)
));
-INSTANTIATE_TEST_CASE_P(accuracy_4D, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_4D, myriadLayersPermuteNDTests_smoke,
::testing::Combine(
::testing::ValuesIn(s_inTensors_4D)
, ::testing::ValuesIn(s_permuteTensors_4D)
, ::testing::ValuesIn(s_permutePrecisions)
));
-INSTANTIATE_TEST_CASE_P(accuracy_5D, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_5D, myriadLayersPermuteNDTests_smoke,
::testing::Combine(
::testing::ValuesIn(s_inTensors_5D)
, ::testing::ValuesIn(s_permuteTensors_5D)
, ::testing::ValuesIn(s_permutePrecisions)
));
-INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayersPermuteNDTests_smoke,
::testing::Values(
std::make_tuple(
SizeVector{8, 50, 256, 7, 7},
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_FasterRCNN, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_FasterRCNN, myriadLayersPermuteNDTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 24, 14, 14})
,::testing::Values<InferenceEngine::SizeVector>({0, 2, 3, 1})
,::testing::ValuesIn(s_permutePrecisions)
));
-INSTANTIATE_TEST_CASE_P(accuracy_MaskRCNN, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_MaskRCNN, myriadLayersPermuteNDTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({4, 3, 1, 88, 120})
,::testing::Values<InferenceEngine::SizeVector>({0, 3, 4, 1, 2})
IRVersion,
InferenceEngine::Precision>;
-class myriadLayersPermuteNDTests_nightly:
+class myriadLayersPermuteNDTests_smoke:
public myriadLayersTests_nightly,
public testing::WithParamInterface<PermuteNDParams> {
};
-TEST_P(myriadLayersPermuteNDTests_nightly, Permute) {
+TEST_P(myriadLayersPermuteNDTests_smoke, Permute) {
const auto& testParams = GetParam();
const auto& inputTensorSizes = std::get<0>(testParams);
const auto& permutationVector = std::get<1>(testParams);
#include "myriad_layers_permute_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersPermuteTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersPermuteTests_smoke,
::testing::Combine(
::testing::ValuesIn(s_inTensors)
, ::testing::ValuesIn(s_permuteTensors)
));
-INSTANTIATE_TEST_CASE_P(accuracyFasterRCNN, myriadLayersPermuteTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracyFasterRCNN, myriadLayersPermuteTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 24, 14, 14})
,::testing::Values<InferenceEngine::SizeVector>({0, 2, 3, 1})
}
typedef std::tuple<InferenceEngine::SizeVector, InferenceEngine::SizeVector> PermuteParams;
-class myriadLayersPermuteTests_nightly: public myriadLayersTests_nightly, /*input tensor, order */
+class myriadLayersPermuteTests_smoke: public myriadLayersTests_nightly, /*input tensor, order */
public testing::WithParamInterface<PermuteParams> {
};
}
}
-TEST_P(myriadLayersPermuteTests_nightly, Permute) {
+TEST_P(myriadLayersPermuteTests_smoke, Permute) {
std::map<std::string, std::string> params;
InferenceEngine::SizeVector output_tensor;
int32_t IW = 0;
//======================================================================
INSTANTIATE_TEST_CASE_P(tricky_ncdhw_avg_userpad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 19, 65, 47}),
Values(KernelShape {1, 3, 5}),
);
INSTANTIATE_TEST_CASE_P(tricky_ncdhw_max_userpad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 19, 65, 47}),
Values(KernelShape {1, 3, 5}),
);
INSTANTIATE_TEST_CASE_P(tricky_ncdhw_avg_autopad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 19, 65, 47}),
Values(KernelShape {1, 3, 5}),
);
INSTANTIATE_TEST_CASE_P(tricky_ncdhw_max_autopad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 19, 65, 47}),
Values(KernelShape {1, 3, 5}),
//======================================================================
INSTANTIATE_TEST_CASE_P(simple_ncdhw_avg_userpad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 20, 64, 48}),
Values(KernelShape {3, 3, 3}),
);
INSTANTIATE_TEST_CASE_P(simple_ncdhw_max_userpad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 20, 64, 48}),
Values(KernelShape {3, 3, 3}),
//----------------------------------------------------------------------
INSTANTIATE_TEST_CASE_P(simple_ncdhw_avg_autopad_1,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 20, 64, 48}),
Values(KernelShape {3, 3, 3}),
);
INSTANTIATE_TEST_CASE_P(simple_ncdhw_avg_autopad_2,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 20, 64, 48}),
Values(KernelShape {3, 3, 3}),
//----------------------------------------------------------------------
INSTANTIATE_TEST_CASE_P(simple_ncdhw_max_autopad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 20, 64, 48}),
Values(KernelShape {3, 3, 3}),
//======================================================================
INSTANTIATE_TEST_CASE_P(tricky_nchw_avg_userpad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 65, 47}),
Values(KernelShape {1, 5}),
);
INSTANTIATE_TEST_CASE_P(tricky_nchw_max_userpad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 65, 47}),
Values(KernelShape {1, 5}),
);
INSTANTIATE_TEST_CASE_P(tricky_nchw_avg_autopad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 65, 47}),
Values(KernelShape {1, 5}),
);
INSTANTIATE_TEST_CASE_P(tricky_nchw_max_autopad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 65, 47}),
Values(KernelShape {1, 5}),
//======================================================================
INSTANTIATE_TEST_CASE_P(simple_nchw_avg_userpad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 64, 48}),
Values(KernelShape {3, 3}),
);
INSTANTIATE_TEST_CASE_P(simple_nchw_max_userpad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 64, 48}),
Values(KernelShape {3, 3}),
//----------------------------------------------------------------------
INSTANTIATE_TEST_CASE_P(simple_nchw_avg_autopad_1,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 64, 48}),
Values(KernelShape {3, 3}),
);
INSTANTIATE_TEST_CASE_P(simple_nchw_avg_autopad_2,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 64, 48}),
Values(KernelShape {3, 3}),
//----------------------------------------------------------------------
INSTANTIATE_TEST_CASE_P(simple_nchw_max_autopad,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 3, 64, 48}),
Values(KernelShape {3, 3}),
//======================================================================
INSTANTIATE_TEST_CASE_P(i3d_id10,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 64, 40, 112, 112}),
Values(KernelShape {1, 3, 3}),
Values(ExcludePad(true))));
INSTANTIATE_TEST_CASE_P(i3d_id47,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 192, 40, 28, 28}),
Values(KernelShape {3, 3, 3}),
Values(ExcludePad(true))));
INSTANTIATE_TEST_CASE_P(i3d_id247,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 832, 20, 14, 14}),
Values(KernelShape {2, 2, 2}),
Values(ExcludePad(true))));
INSTANTIATE_TEST_CASE_P(i3d_id312,
- myriadLayersPoolNDTest_nightly,
+ myriadLayersPoolNDTest_smoke,
Combine(
Values(InputShape {1, 1024, 10, 7, 7}),
Values(KernelShape {2, 7, 7}),
}
};
-class myriadLayersPoolNDTest_nightly: public PoolNDTest {};
+class myriadLayersPoolNDTest_smoke: public PoolNDTest {};
-TEST_P(myriadLayersPoolNDTest_nightly, PoolND) {
+TEST_P(myriadLayersPoolNDTest_smoke, PoolND) {
testPoolND();
}
#include "myriad_layers_pooling_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 192, 56, 56})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3)) /* kernel */
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_1, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 576, 14, 14})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
);
-INSTANTIATE_TEST_CASE_P(accuracy_4X4, myriadLayers_IR3_PoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_4X4, myriadLayers_IR3_PoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({10, 1024, 4, 4})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 4, 4)) /* kernel */
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_1X1, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1X1, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 3, 5, 7})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1)) /* kernel */
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p0000, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p0000, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p0001, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p0001, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
, ::testing::ValuesIn(s_poolingMethod)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p0011, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p0011, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
, ::testing::ValuesIn(s_poolingMethod)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p0111, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p0111, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
, ::testing::ValuesIn(s_poolingMethod)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1111, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1111, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
, ::testing::ValuesIn(s_poolingMethod)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1110, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1110, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
, ::testing::ValuesIn(s_poolingMethod)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1100, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1100, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
, ::testing::ValuesIn(s_poolingMethod)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1000, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1000, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
, ::testing::ValuesIn(s_poolingMethod)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1101, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1101, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
, ::testing::ValuesIn(s_poolingMethod)
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1011, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1011, myriadLayers_IR3_BatchPoolingTests_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel */
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMax_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsFull),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxOverlappedByKernel_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxOverlappedByKernel_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 1024, 6, 6}),
::testing::Values<param_size>(MAKE_STRUCT(param_size, 7, 7)),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPad4_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPad4_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInputPad4),
::testing::ValuesIn(g_poolingKernelPad4),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPad4_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPad4_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInputPad4),
::testing::ValuesIn(g_poolingKernelPad4),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGlobalMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGlobalMax_smoke,
::testing::ValuesIn(g_GlobalPoolingInput ));
-INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayersTestsMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayersTestsMax_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(s_poolingLayerParams_k3x3),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvg_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvg_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsFull),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgOverlappedByKernel_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgOverlappedByKernel_smoke,
::testing::Combine(
::testing::Values<InferenceEngine::SizeVector>({1, 1024, 6, 6}),
::testing::Values<param_size>(MAKE_STRUCT(param_size, 7, 7)),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayersTestsAvg_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayersTestsAvg_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(s_poolingLayerParams_k3x3),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGlobalAvg_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGlobalAvg_smoke,
::testing::ValuesIn(g_GlobalPoolingInput));
extern const char POOLING_AVG[] = "avg";
-class myriadLayersTestsMax_nightly: public PoolingTest<POOLING_MAX>
+class myriadLayersTestsMax_smoke: public PoolingTest<POOLING_MAX>
{
};
-class myriadLayersTestsMaxOverlappedByKernel_nightly: public PoolingTestPad4<POOLING_MAX, true>
+class myriadLayersTestsMaxOverlappedByKernel_smoke: public PoolingTestPad4<POOLING_MAX, true>
{
};
-class myriadLayersTestsMaxPad4_nightly: public PoolingTestPad4<POOLING_MAX>
+class myriadLayersTestsMaxPad4_smoke: public PoolingTestPad4<POOLING_MAX>
{
};
-class myriadLayersTestsGlobalMax_nightly: public GlobalPoolingTest<POOLING_MAX>
+class myriadLayersTestsGlobalMax_smoke: public GlobalPoolingTest<POOLING_MAX>
{
};
-class myriadLayersTestsAvg_nightly: public PoolingTest<POOLING_AVG>
+class myriadLayersTestsAvg_smoke: public PoolingTest<POOLING_AVG>
{
};
-class myriadLayersTestsAvgOverlappedByKernel_nightly: public PoolingTestPad4<POOLING_AVG, true>
+class myriadLayersTestsAvgOverlappedByKernel_smoke: public PoolingTestPad4<POOLING_AVG, true>
{
};
-class myriadLayersTestsAvgPad4_nightly: public PoolingTestPad4<POOLING_AVG>
+class myriadLayersTestsAvgPad4_smoke: public PoolingTestPad4<POOLING_AVG>
{
};
-class myriadLayersTestsGlobalAvg_nightly: public GlobalPoolingTest<POOLING_AVG>
+class myriadLayersTestsGlobalAvg_smoke: public GlobalPoolingTest<POOLING_AVG>
{
};
/* input tensor, kernel, stride, pads_begin, pads_end, auto_pad, exclude_pad method */
typedef std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, param_size, const char*, const char*, const char*> IR3_PoolParams;
-class myriadLayers_IR3_PoolingTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_IR3_PoolingTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
public testing::WithParamInterface<IR3_PoolParams> {
};
}
-TEST_P(myriadLayers_IR3_PoolingTests_nightly, Pooling) {
+TEST_P(myriadLayers_IR3_PoolingTests_smoke, Pooling) {
std::map<std::string, std::string> params;
InferenceEngine::SizeVector output_tensor;
int32_t IW = 0;
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), maxerr);
}
-class myriadLayers_IR3_BatchPoolingTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_IR3_BatchPoolingTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
public testing::WithParamInterface<IR3_PoolParams> {
};
-TEST_P(myriadLayers_IR3_BatchPoolingTests_nightly, Pooling) {
+TEST_P(myriadLayers_IR3_BatchPoolingTests_smoke, Pooling) {
std::map<std::string, std::string> params;
InferenceEngine::SizeVector output_tensor;
int32_t IW = 0;
"max"
};
-TEST_P(myriadLayersTestsMax_nightly, MaxPooling)
+TEST_P(myriadLayersTestsMax_smoke, MaxPooling)
{
ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
}
-TEST_P(myriadLayersTestsMaxOverlappedByKernel_nightly, MaxPooling)
+TEST_P(myriadLayersTestsMaxOverlappedByKernel_smoke, MaxPooling)
{
ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
}
-TEST_P(myriadLayersTestsMaxPad4_nightly, MaxPoolingPad4)
+TEST_P(myriadLayersTestsMaxPad4_smoke, MaxPoolingPad4)
{
ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
auto refBlob = getReferenceOutput();
CompareCommonAbsolute(_outputMap.begin()->second, refBlob, ERROR_BOUND);
}
-TEST_P(myriadLayersTestsAvg_nightly, AvgPooling)
+TEST_P(myriadLayersTestsAvg_smoke, AvgPooling)
{
ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
}
-TEST_P(myriadLayersTestsAvgOverlappedByKernel_nightly, AvgPooling)
+TEST_P(myriadLayersTestsAvgOverlappedByKernel_smoke, AvgPooling)
{
ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
}
-TEST_P(myriadLayersTestsAvgPad4_nightly, AvgPoolingPad4)
+TEST_P(myriadLayersTestsAvgPad4_smoke, AvgPoolingPad4)
{
ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
auto refBlob = getReferenceOutput();
CompareCommonAbsolute(_outputMap.begin()->second, refBlob, ERROR_BOUND);
}
-TEST_P(myriadLayersTestsGlobalMax_nightly, GlobalMaxPooling)
+TEST_P(myriadLayersTestsGlobalMax_smoke, GlobalMaxPooling)
{
ASSERT_TRUE(generateNetAndInfer(NetworkInitParams()));
auto refBlob = getReferenceOutput();
CompareCommonAbsolute(_outputMap.begin()->second, refBlob, ERROR_BOUND);
}
-TEST_P(myriadLayersTestsGlobalAvg_nightly, GlobalAvgPooling)
+TEST_P(myriadLayersTestsGlobalAvg_smoke, GlobalAvgPooling)
{
if(_pad_val.x != 0 || _pad_val.y != 0) {
GTEST_SKIP() << "paddings should not be exist for GlobalAvgPool";
//
#include "myriad_layers_power_test.hpp"
-INSTANTIATE_TEST_CASE_P( accuracy, myriadLayersTestsPowerParams_nightly,
+INSTANTIATE_TEST_CASE_P( accuracy, myriadLayersTestsPowerParams_smoke,
::testing::Combine(
::testing::ValuesIn(s_powerTensors),
::testing::ValuesIn(s_powerParams))
}
}
-typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, pwr_test_params>> myriadLayersTestsPowerParams_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, pwr_test_params>> myriadLayersTestsPowerParams_smoke;
-TEST_P(myriadLayersTestsPowerParams_nightly, TestsPower) {
+TEST_P(myriadLayersTestsPowerParams_smoke, TestsPower) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
auto param = GetParam();
SizeVector tensor = std::get<0>(param);
#include "myriad_layers_prelu_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy_PReLU, myriadLayerPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_PReLU, myriadLayerPReLU_smoke,
::testing::Combine(
::testing::ValuesIn(s_PReLUTensors)
, ::testing::Values<ChannelSharedPrelu>(0, 1)
);
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayerFullyConnectedWithPReLU_nightly,
+ accuracy, myriadLayerFullyConnectedWithPReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_fcTestParamsSubset),
::testing::Values(g_dimensionsFC[0]),
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithPReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsLite),
::testing::ValuesIn(s_PReluLayerParams))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithPReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsLite),
::testing::ValuesIn(s_PReluLayerParams))
);
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithPReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput_postOp),
::testing::Values<pooling_layer_params>(MAKE_STRUCT(pooling_layer_params, {3, 3}, {1, 1}, {1, 1})),
::testing::Values<PReLULayerDef>(MAKE_STRUCT(PReLULayerDef, {{{PRELU_PARAM, "0"}}})))
);
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithPReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput_postOp),
::testing::Values<pooling_layer_params>(MAKE_STRUCT(pooling_layer_params, {3, 3}, {1, 1}, {1, 1})),
::testing::Values<PReLULayerDef>(MAKE_STRUCT(PReLULayerDef, {{{PRELU_PARAM, "0"}}})))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithPReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_convolutionTensors)
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayerConvolutionWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayerConvolutionWithPReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput_postOp)
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
using namespace InferenceEngine;
PRETTY_PARAM(ChannelSharedPrelu, int);
-typedef myriadLayerTestBaseWithParam<tuple<SizeVector, ChannelSharedPrelu >> myriadLayerPReLU_nightly;
+typedef myriadLayerTestBaseWithParam<tuple<SizeVector, ChannelSharedPrelu >> myriadLayerPReLU_smoke;
-TEST_P(myriadLayerPReLU_nightly, PReLU) {
+TEST_P(myriadLayerPReLU_smoke, PReLU) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
SizeVector dims = get<0>(GetParam());
{{{PRELU_PARAM, "1"}}}
};
-class myriadLayerFullyConnectedWithPReLU_nightly: public FCTest<PReLULayerDef>{
+class myriadLayerFullyConnectedWithPReLU_smoke: public FCTest<PReLULayerDef>{
};
#define TEST_BODY \
ref_PReLU_wrap);\
ASSERT_TRUE(generateNetAndInfer(NetworkInitParams()));
-TEST_P(myriadLayerFullyConnectedWithPReLU_nightly, TestsFullyConnected)
+TEST_P(myriadLayerFullyConnectedWithPReLU_smoke, TestsFullyConnected)
{
auto p = ::testing::WithParamInterface<std::tuple<fcon_test_params, int32_t, int32_t, PReLULayerDef>>::GetParam();
auto extraLayerParams = std::get<3>(p);
#define ERROR_BOUND_WITH_RELU (4.e-3f)
-class myriadLayersTestsMaxPoolingWithPReLU_nightly: public PoolingTest<POOLING_MAX, PReLULayerDef>{
+class myriadLayersTestsMaxPoolingWithPReLU_smoke: public PoolingTest<POOLING_MAX, PReLULayerDef>{
};
-class myriadLayersTestsAvgPoolingWithPReLU_nightly: public PoolingTest<POOLING_AVG, PReLULayerDef>{
+class myriadLayersTestsAvgPoolingWithPReLU_smoke: public PoolingTest<POOLING_AVG, PReLULayerDef>{
};
-TEST_P(myriadLayersTestsMaxPoolingWithPReLU_nightly, TestsMaxPoolingWithPReLU)
+TEST_P(myriadLayersTestsMaxPoolingWithPReLU_smoke, TestsMaxPoolingWithPReLU)
{
auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, pooling_layer_params, vpu::LayoutPreference, PReLULayerDef>>::GetParam();
auto extraLayerParams = std::get<3>(p);
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_RELU);
}
-TEST_P(myriadLayersTestsAvgPoolingWithPReLU_nightly, TestsAvgPoolingWithPReLU)
+TEST_P(myriadLayersTestsAvgPoolingWithPReLU_smoke, TestsAvgPoolingWithPReLU)
{
auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, pooling_layer_params, vpu::LayoutPreference, PReLULayerDef>>::GetParam();
auto extraLayerParams = std::get<3>(p);
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_RELU);
}
-class myriadLayerConvolutionWithPReLU_nightly: public ConvolutionTest<PReLULayerDef>{
+class myriadLayerConvolutionWithPReLU_smoke: public ConvolutionTest<PReLULayerDef>{
};
-TEST_P(myriadLayerConvolutionWithPReLU_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionWithPReLU_smoke, Convolution) {
auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, uint32_t, uint32_t, PReLULayerDef>>::GetParam();
auto extraLayerParams = std::get<6>(p);
TEST_BODY;
}
}
-class myriadLayersPriorBoxTests_nightly : public myriadLayersTests_nightly {
+class myriadLayersPriorBoxTests_smoke : public myriadLayersTests_nightly {
public:
Blob::Ptr getFp16Blob(const Blob::Ptr& in) {
if (in->getTensorDesc().getPrecision() == Precision::FP16)
}
};
-TEST_F(myriadLayersPriorBoxTests_nightly, NotLastLayer)
+TEST_F(myriadLayersPriorBoxTests_smoke, NotLastLayer)
{
std::string model = R"V0G0N(
<net name="PriorBox" version="2" batch="1">
RunOnModel(model, "priorbox_copy");
}
-TEST_F(myriadLayersPriorBoxTests_nightly, LastLayer_FP16)
+TEST_F(myriadLayersPriorBoxTests_smoke, LastLayer_FP16)
{
std::string model = R"V0G0N(
<net name="PriorBox" version="2" batch="1">
RunOnModel(model, "priorbox", Precision::FP16);
}
-TEST_F(myriadLayersPriorBoxTests_nightly, LastLayer_FP32)
+TEST_F(myriadLayersPriorBoxTests_smoke, LastLayer_FP32)
{
std::string model = R"V0G0N(
<net name="PriorBox" version="2" batch="1">
CompareCommonAbsolute(_refBlob, outputBlob, 0.0);
}
-TEST_F(myriadLayersPriorBoxTests_nightly, FaceBoxLayer)
+TEST_F(myriadLayersPriorBoxTests_smoke, FaceBoxLayer)
{
std::string model = R"V0G0N(
<net name="PriorBox" version="2" batch="1">
RunOnModelWithParams(model, "priorbox", params, Precision::FP16);
}
-TEST_F(myriadLayersPriorBoxTests_nightly, TwoPriorBoxLayersWithUnusedInput)
+TEST_F(myriadLayersPriorBoxTests_smoke, TwoPriorBoxLayersWithUnusedInput)
{
std::string model = R"V0G0N(
<net name="PriorBox" version="2" batch="1">
#define OUTPUT_SAMPLING_NUM (20) // Validate only top 20 rois
#define OUTPUT_ROI_MATCH_THRESHOLD (18) // At least 18 rois should be matched
-class myriadLayersTestsProposal_nightly : public myriadLayersTests_nightly {
+class myriadLayersTestsProposal_smoke : public myriadLayersTests_nightly {
protected:
std::string model;
)V0G0N";
}
-TEST_F(myriadLayersTestsProposal_nightly, Caffe) {
+TEST_F(myriadLayersTestsProposal_smoke, Caffe) {
// Verify only 20 ranked proposal output with GT values
std::vector<float> gt_values = {
ASSERT_NO_FATAL_FAILURE(compareOutputSampleToRef(gt_values, 0.26f));
}
-TEST_F(myriadLayersTestsProposal_nightly, CaffeNoClipBeforeNms) {
+TEST_F(myriadLayersTestsProposal_smoke, CaffeNoClipBeforeNms) {
// Verify only 20 ranked proposal output with GT values - reference get from MKLDNN plugin
std::vector<float> gt_values = {
ASSERT_NO_FATAL_FAILURE(compareOutputSampleToRef(gt_values, 0.26f));
}
-TEST_F(myriadLayersTestsProposal_nightly, CaffeClipAfterNms) {
+TEST_F(myriadLayersTestsProposal_smoke, CaffeClipAfterNms) {
// Verify only 20 ranked proposal output with GT values
std::vector<float> gt_values = {
ASSERT_NO_FATAL_FAILURE(compareOutputSampleToRef(gt_values, 0.26f));
}
-TEST_F(myriadLayersTestsProposal_nightly, CaffeNormalizedOutput) {
+TEST_F(myriadLayersTestsProposal_smoke, CaffeNormalizedOutput) {
// Verify only 20 ranked proposal output with GT values
std::vector<float> gt_values = {
ASSERT_NO_FATAL_FAILURE(compareOutputSampleToRef(gt_values, 0.026f));
}
-TEST_F(myriadLayersTestsProposal_nightly, TensorFlow) {
+TEST_F(myriadLayersTestsProposal_smoke, TensorFlow) {
model = R"V0G0N(
<net name="testProposal" version="2" batch="1">
#include "myriad_layers_psroipooling_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsPSROIPooling_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsPSROIPooling_smoke,
::testing::Combine(
::testing::ValuesIn(s_PSROIPoolingLayerInput),
::testing::ValuesIn(s_PSROIPoolingLayerParam),
using PSROIPoolingTestParams = std::tuple<Dims, psroipooling_param, uint32_t>;
-class myriadLayersTestsPSROIPooling_nightly: public myriadLayerTestBaseWithParam<PSROIPoolingTestParams> {
+class myriadLayersTestsPSROIPooling_smoke: public myriadLayerTestBaseWithParam<PSROIPoolingTestParams> {
public:
void genROIs(InferenceEngine::Blob::Ptr rois,
const PSROIPoolingParams& params,
1, 10, 30, 50, 100, 300
};
-TEST_P(myriadLayersTestsPSROIPooling_nightly, PSROIPooling) {
+TEST_P(myriadLayersTestsPSROIPooling_smoke, PSROIPooling) {
#if defined(_WIN32) || defined(WIN32)
SKIP() << "Disabled for Windows. CVS-13239";
#endif
#include "myriad_layers_reduce_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceAnd_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceAnd_smoke,
::testing::Combine(
- ::testing::ValuesIn(s_input_dims),
+ ::testing::ValuesIn(s_input_pair),
::testing::ValuesIn(s_axes_list),
::testing::ValuesIn(s_data_precision),
::testing::ValuesIn(s_keep_dims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMin_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMin_smoke,
::testing::Combine(
- ::testing::ValuesIn(s_input_dims),
+ ::testing::ValuesIn(s_input_pair),
::testing::ValuesIn(s_axes_list),
::testing::ValuesIn(s_data_precision),
::testing::ValuesIn(s_keep_dims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMax_smoke,
::testing::Combine(
- ::testing::ValuesIn(s_input_dims),
+ ::testing::ValuesIn(s_input_pair),
::testing::ValuesIn(s_axes_list),
::testing::ValuesIn(s_data_precision),
::testing::ValuesIn(s_keep_dims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceSum_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceSum_smoke,
::testing::Combine(
- ::testing::ValuesIn(s_input_dims),
+ ::testing::ValuesIn(s_input_pair),
::testing::ValuesIn(s_axes_list),
::testing::ValuesIn(s_data_precision),
::testing::ValuesIn(s_keep_dims))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMean_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMean_smoke,
::testing::Combine(
- ::testing::ValuesIn(s_input_dims),
+ ::testing::ValuesIn(s_input_pair),
::testing::ValuesIn(s_axes_list),
::testing::ValuesIn(s_data_precision),
::testing::ValuesIn(s_keep_dims))
-);
+);
\ No newline at end of file
{REDUCE_MEAN, {&refReduceMeanI32, 0.0f, RefReduceMean<int32_t>::generateData}},
};
-using ReduceTestParams = std::tuple<SizeVector, SizeVector, Precision, bool>;
+using ReduceTestParams = std::tuple<std::pair<SizeVector, vpu::LayoutPreference>, SizeVector, Precision, bool>;
static const Precision axesPrecision = Precision::I32;
for (int i : list)
{
if (i < 0) // handle negative indices
- i = ndims - i;
+ i = ndims - std::abs(i);
EXPECT_TRUE((i >= 0) && (i < ndims));
mask |= (1 << i);
}
return mask;
}
- static Layout defaultLayout(int ndims)
- {
- switch (ndims)
- {
- case 5: return NCDHW;
- case 4: return NCHW;
- case 3: return CHW;
- case 2: return NC;
- case 1: return C;
- }
- return ANY;
- }
+
static void getAxesBlob(const SizeVector& axesList, TBlob<uint8_t>::Ptr& weightsBlob, TBlob<int32_t>::Ptr& axesBlob)
{
size_t axes_size = axesList.size();
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
const auto params = GetParam();
- const auto inputDims = std::get<0>(params);
- const auto axesList = std::get<1>(params);
+ const auto inputPair = std::get<0>(params);
+ auto axesList = std::get<1>(params);
const auto dataPrecision = std::get<2>(params);
const int keepDims = std::get<3>(params) ? 1 : 0;
+ const auto inputDims = inputPair.first;
+ const auto layoutPreference = inputPair.second;
+
const auto outputDims = ReduceUtils::calcOutputDims(inputDims, axesList, keepDims);
const auto model = ReduceUtils::getModel(inputDims, axesList, outputDims, ReduceType, dataPrecision, keepDims);
TBlob<int32_t>::Ptr axesBlob;
ReduceUtils::getAxesBlob(axesList, weightsBlob, axesBlob);
ASSERT_NE(weightsBlob, nullptr);
-
+
ASSERT_NO_THROW(readNetwork(model, weightsBlob));
const auto& network = _cnnNetwork;
_inputsInfo = network.getInputsInfo();
_inputsInfo["reduce_input"]->setPrecision(dataPrecision);
- _inputsInfo["reduce_input"]->setLayout(ReduceUtils::defaultLayout(inputDims.size()));
+ _inputsInfo["reduce_input"]->setLayout(vpu::deviceLayout(TensorDesc::getLayoutByDims(inputDims), layoutPreference));
_outputsInfo = network.getOutputsInfo();
_outputsInfo["reduce"]->setPrecision(dataPrecision);
- _outputsInfo["reduce"]->setLayout(ReduceUtils::defaultLayout(outputDims.size()));
-
+ _outputsInfo["reduce"]->setLayout(vpu::deviceLayout(TensorDesc::getLayoutByDims(outputDims), layoutPreference));
StatusCode st = OK;
ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(_exeNetwork, network, _config, &_resp));
refBlob = make_shared_blob<ie_fp16>(outputBlob->getTensorDesc());
refBlob->allocate();
- ref_reduce(inputBlob, axesBlob, refBlob, keepDims, reduceOp);
+ ref_reduce(inputBlob, axesBlob, refBlob, keepDims, layoutPreference, reduceOp);
CompareCommonAbsolute(outputBlob, refBlob, compareThreshold);
- } else if (dataPrecision == Precision::I32) {
- auto opIt = refMapI32.find(ReduceType);
- ASSERT_TRUE(opIt != refMapI32.end());
- auto reduceOp = opIt->second.op;
- auto generateData = opIt->second.generateData;
- generateData(inputBlob);
-
- ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
- refBlob = make_shared_blob<int32_t>(outputBlob->getTensorDesc());
- refBlob->allocate();
- ref_reduce(inputBlob, axesBlob, refBlob, keepDims, reduceOp);
- CompareCommonExact(outputBlob, refBlob);
- }
+ } else if (dataPrecision == Precision::I32) {
+ auto opIt = refMapI32.find(ReduceType);
+ ASSERT_TRUE(opIt != refMapI32.end());
+ auto reduceOp = opIt->second.op;
+ auto generateData = opIt->second.generateData;
+ generateData(inputBlob);
+
+ ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
+ ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+
+ refBlob = make_shared_blob<int32_t>(outputBlob->getTensorDesc());
+ refBlob->allocate();
+ ref_reduce(inputBlob, axesBlob, refBlob, keepDims, layoutPreference, reduceOp);
+ CompareCommonExact(outputBlob, refBlob);
+ }
}
};
-class myriadTestsReduceAnd_nightly: public ReduceTest<REDUCE_AND>
+class myriadTestsReduceAnd_smoke: public ReduceTest<REDUCE_AND>
{
};
-class myriadTestsReduceMin_nightly: public ReduceTest<REDUCE_MIN>
+class myriadTestsReduceMin_smoke: public ReduceTest<REDUCE_MIN>
{
};
-class myriadTestsReduceMax_nightly: public ReduceTest<REDUCE_MAX>
+class myriadTestsReduceMax_smoke: public ReduceTest<REDUCE_MAX>
{
};
-class myriadTestsReduceSum_nightly: public ReduceTest<REDUCE_SUM>
+class myriadTestsReduceSum_smoke: public ReduceTest<REDUCE_SUM>
{
};
-class myriadTestsReduceMean_nightly: public ReduceTest<REDUCE_MEAN>
+class myriadTestsReduceMean_smoke: public ReduceTest<REDUCE_MEAN>
{
};
// Tests are disabled due to hang: #-28315
-TEST_P(myriadTestsReduceAnd_nightly, And)
+TEST_P(myriadTestsReduceAnd_smoke, And)
{
testReduce();
}
-TEST_P(myriadTestsReduceMin_nightly, Min)
+TEST_P(myriadTestsReduceMin_smoke, Min)
{
testReduce();
}
-TEST_P(myriadTestsReduceMax_nightly, Max)
+TEST_P(myriadTestsReduceMax_smoke, Max)
{
testReduce();
}
-TEST_P(myriadTestsReduceSum_nightly, Sum)
+TEST_P(myriadTestsReduceSum_smoke, Sum)
{
testReduce();
}
-TEST_P(myriadTestsReduceMean_nightly, Mean)
+TEST_P(myriadTestsReduceMean_smoke, Mean)
{
testReduce();
}
-static const std::vector<SizeVector> s_input_dims =
+static const std::vector<std::pair<SizeVector, vpu::LayoutPreference>> s_input_pair =
{
- {1, 3, 2, 14, 32},
- {2, 2, 2, 14, 32},
- {3, 5, 4, 8, 16},
- {4, 2, 16, 16, 8},
-
- {3, 2, 14, 32},
- {2, 2, 14, 32},
- {5, 4, 8, 16},
- {2, 16, 16, 8},
-
- {3, 2, 14},
- {2, 2, 14},
- {5, 4, 8},
- {2, 16, 16},
-
- { 7, 3, 5, 1, 7, 11, 12},
+ {{1, 3, 2, 14, 32}, vpu::LayoutPreference::ChannelMinor},
+ {{1, 3, 2, 14, 32}, vpu::LayoutPreference::ChannelMajor},
+ {{2, 2, 2, 14, 32}, vpu::LayoutPreference::ChannelMinor},
+ {{2, 2, 2, 14, 32}, vpu::LayoutPreference::ChannelMajor},
+ {{3, 5, 4, 8, 16}, vpu::LayoutPreference::ChannelMinor},
+ {{3, 5, 4, 8, 16}, vpu::LayoutPreference::ChannelMajor},
+ {{4, 2, 16, 16, 8}, vpu::LayoutPreference::ChannelMinor},
+ {{4, 2, 16, 16, 8}, vpu::LayoutPreference::ChannelMajor},
+
+ {{3, 2, 14, 32}, vpu::LayoutPreference::ChannelMinor},
+ {{3, 2, 14, 32}, vpu::LayoutPreference::ChannelMajor},
+ {{2, 2, 14, 32}, vpu::LayoutPreference::ChannelMinor},
+ {{2, 2, 14, 32}, vpu::LayoutPreference::ChannelMajor},
+ {{5, 4, 8, 16}, vpu::LayoutPreference::ChannelMinor},
+ {{5, 4, 8, 16}, vpu::LayoutPreference::ChannelMajor},
+ {{2, 16, 16, 8}, vpu::LayoutPreference::ChannelMinor},
+ {{2, 16, 16, 8}, vpu::LayoutPreference::ChannelMajor},
+
+ {{3, 2, 14}, vpu::LayoutPreference::ChannelMajor},
+ {{2, 2, 14}, vpu::LayoutPreference::ChannelMajor},
+ {{5, 4, 8}, vpu::LayoutPreference::ChannelMajor},
+ {{2, 16, 16}, vpu::LayoutPreference::ChannelMajor},
+
+ {{7, 3, 5, 1, 7, 11, 12}, vpu::LayoutPreference::ChannelMajor},
};
static const std::vector<SizeVector> s_axes_list =
#include "myriad_layers_region_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayerRegionYolo_nightly,
- ::testing::ValuesIn(s_regionData)
-);
-
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsRegion_CHW_HW_nightly,
- ::testing::Combine(
- ::testing::Values<InferenceEngine::SizeVector>({1, 125, 13, 13})
- , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
- , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
- , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 0, 0))
- , ::testing::Values<uint32_t>(125)
- , ::testing::Values<uint32_t>(1)
- )
-);
-
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsRegion_CHW_HW_80cl_nightly,
- ::testing::Combine(
- ::testing::Values<InferenceEngine::SizeVector>({1, 425, 13, 13})
- , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
- , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
- , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 0, 0))
- , ::testing::Values<uint32_t>(425)
- , ::testing::Values<uint32_t>(1)
- )
-);
-
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerRegionYolo_CHW_nightly,
- ::testing::ValuesIn(s_classes)
-);
+ accuracy, myriadLayersTestsRegionYolo_smoke,
+ ::testing::Combine(
+ ::testing::Values<Coords>(4),
+ ::testing::Values<Classes>(20, 80),
+ ::testing::Values<Num>(5, 10),
+ ::testing::Values<MaskSize>(3),
+ ::testing::Values<DoSoftmax>(1, 0),
+ ::testing::Values(vpu::LayoutPreference::ChannelMajor, vpu::LayoutPreference::ChannelMinor),
+ ::testing::Values(IRVersion::v7, IRVersion::v10),
+ ::testing::ValuesIn(s_CustomConfig)
+));
#include <gtest/gtest.h>
#include "myriad_layers_tests.hpp"
-#include <math.h>
+#include <cmath>
using namespace InferenceEngine;
-struct region_test_params {
- tensor_test_params in;
- int coords;
- int classes;
- int num;
- int maskSize;
- int doSoftMax;
- std::string customLayers;
- friend std::ostream& operator<<(std::ostream& os, region_test_params const& tst)
- {
- return os << "tensor (" << tst.in
- << "),coords=" << tst.coords
- << ", classes=" << tst.classes
- << ", num=" << tst.num
- << ", maskSize=" << tst.maskSize
- << ", doSoftMax=" << tst.doSoftMax
- << ", by using custom layer=" << (tst.customLayers.empty() ? "no" : "yes");
- };
-};
-
-class myriadLayerRegionYolo_nightly: public myriadLayersTests_nightly,
- public testing::WithParamInterface<region_test_params> {
-};
-
-TEST_P(myriadLayerRegionYolo_nightly, BaseTestsRegion) {
- region_test_params p = ::testing::WithParamInterface<region_test_params>::GetParam();
-
- // TODO: M2 mode is not working for OpenCL compiler
- if(!p.customLayers.empty() && !CheckMyriadX()) {
- GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
- }
-
- std::map<std::string, std::string> params;
-
- params["coords"] = std::to_string(p.coords);
- params["classes"] = std::to_string(p.classes);
- params["num"] = std::to_string(p.num);
- params["mask"] = "0,1,2";
- params["do_softmax"] = std::to_string(p.doSoftMax);
-
- InferenceEngine::SizeVector tensor;
- tensor.resize(4);
- tensor[3] = p.in.w;
- tensor[2] = p.in.h;
- tensor[1] = p.in.c;
- tensor[0] = 1;
- _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = p.customLayers;
- _testNet.addLayer(LayerInitParams("RegionYolo")
- .params(params)
- .in({tensor})
- .out({tensor}),
- ref_RegionYolo_wrap);
- ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(vpu::LayoutPreference::ChannelMinor)));
- CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.0025);
-}
-
-static std::vector<region_test_params> s_regionData = {
- region_test_params{{1, (4+20+1)*5, 13, 13}, 4, 20, 5, 3, 1, ""},
- region_test_params{{1, (4+80+1)*5, 13, 13}, 4, 80, 5, 3, 1, ""},
- region_test_params{{1, (4+20+1)*3, 13, 13}, 4, 20, 9, 3, 0, ""},
- region_test_params{{1, (4+80+1)*3, 13, 13}, 4, 80, 9, 3, 0, ""},
-
-#ifdef VPU_HAS_CUSTOM_KERNELS
- region_test_params{{1, (4+20+1)*5, 13, 13}, 4, 20, 5, 3, 1, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
- region_test_params{{1, (4+80+1)*5, 13, 13}, 4, 80, 5, 3, 1, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
- region_test_params{{1, (4+20+1)*3, 13, 13}, 4, 20, 9, 3, 0, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
- region_test_params{{1, (4+80+1)*3, 13, 13}, 4, 80, 9, 3, 0, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
-#endif
-};
-
-/* HW network needs to be created to test strides influence to RegionYolo input */
-/* so convolution layer added as the first layer to this test */
-class myriadLayersTestsRegion_CHW_HW_nightly: public ConvolutionTest<>{
-};
-
-/*80 input classes */
-class myriadLayersTestsRegion_CHW_HW_80cl_nightly: public ConvolutionTest<>{
-};
-
-/* to passthrough "original" data */
-template<size_t width>
-void constWeightsRange(uint16_t* ptr, size_t weightsSize) {
- ASSERT_NE(ptr, nullptr);
- ASSERT_EQ(weightsSize, width * width);
- std::memset(ptr, 0, sizeof(uint16_t) * (weightsSize));
- for (int i = 0; i < weightsSize/width; ++i) {
- ptr[i * width + i] = PrecisionUtils::f32tof16(1.0f);
- }
-}
-
-void constBiasesRange(uint16_t* ptr, size_t weightsSize) {
- std::memset(ptr, 0, sizeof(uint16_t) * (weightsSize));
-}
-
-void loadData(InferenceEngine::Blob::Ptr blob) {
- /* input blob has predefined size and CHW layout */
- ASSERT_NE(blob, nullptr);
- auto inDims = blob->getTensorDesc().getDims();
- InferenceEngine::Blob::Ptr inputBlobRef =
- InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, inDims, InferenceEngine::NCHW});
- inputBlobRef->allocate();
- const float* ref_values = inputBlobRef->buffer();
-
- std::string inputTensorBinary = TestDataHelpers::get_data_path();
- inputTensorBinary += "/vpu/InputYoLoV2Tiny.bin";
- ASSERT_TRUE(fromBinaryFile(inputTensorBinary, inputBlobRef));
- uint16_t *inputBlobRawDataFp16 = static_cast<uint16_t *>(blob->buffer());
- ASSERT_NE(inputBlobRawDataFp16, nullptr);
-
- switch(blob->getTensorDesc().getLayout()) {
- case InferenceEngine::NCHW:
- for (int indx = 0; indx < blob->size(); indx++) {
- inputBlobRawDataFp16[indx] = PrecisionUtils::f32tof16(ref_values[indx]);
- }
- break;
- case InferenceEngine::NHWC:
- for (int h = 0 ; h < inDims[2]; ++h) {
- for (int w = 0 ; w < inDims[3]; ++w) {
- for (int c = 0 ; c < inDims[1]; ++c) {
- int src_i = w + inDims[3] * h + inDims[3] * inDims[2] * c;
- int dst_i = c + inDims[1] * w + inDims[3] * inDims[1] * h;
- inputBlobRawDataFp16[dst_i] = PrecisionUtils::f32tof16(ref_values[src_i]);
- }
- }
- }
- break;
- default:
- FAIL() << "unsupported layout: " << blob->getTensorDesc().getLayout();
+#define ERROR_BOUND 0.0005f
+
+PRETTY_PARAM(Coords, int)
+PRETTY_PARAM(Classes, int)
+PRETTY_PARAM(Num, int)
+PRETTY_PARAM(MaskSize, int)
+PRETTY_PARAM(DoSoftmax, int)
+PRETTY_PARAM(CustomConfig, std::string)
+
+typedef myriadLayerTestBaseWithParam<std::tuple<Coords, Classes, Num, MaskSize, DoSoftmax,
+ vpu::LayoutPreference, IRVersion, CustomConfig>> myriadLayersTestsRegionYolo_smoke;
+
+TEST_P(myriadLayersTestsRegionYolo_smoke, RegionYolo) {
+ const int coords = std::get<0>(GetParam());
+ const int classes = std::get<1>(GetParam());
+ const int num = std::get<2>(GetParam());
+ const int maskSize = std::get<3>(GetParam());
+ const int doSoftmax = std::get<4>(GetParam());
+ const auto layoutPreference = std::get<5>(GetParam());
+ _irVersion = std::get<6>(GetParam());
+ const std::string customConfig = std::get<7>(GetParam());
+
+ if (!customConfig.empty() && !CheckMyriadX()) {
+ GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
}
-}
-
-void loadData_80cl(InferenceEngine::Blob::Ptr blob) {
- /* input blob has predefined size and CHW layout */
- ASSERT_NE(blob, nullptr);
- auto inDims = blob->getTensorDesc().getDims();
- InferenceEngine::Blob::Ptr inputBlobRef =
- InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, inDims, InferenceEngine::NCHW});
- inputBlobRef->allocate();
- const float* ref_values = inputBlobRef->buffer();
- std::string inputTensorBinary = TestDataHelpers::get_data_path();
- inputTensorBinary += "/vpu/InputYoLoV2_80cl.bin";
- ASSERT_TRUE(fromBinaryFile(inputTensorBinary, inputBlobRef));
- uint16_t *inputBlobRawDataFp16 = static_cast<uint16_t *>(blob->buffer());
- ASSERT_NE(inputBlobRawDataFp16, nullptr);
+ _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
- switch(blob->getTensorDesc().getLayout()) {
- case InferenceEngine::NCHW:
- for (int indx = 0; indx < blob->size(); indx++) {
- inputBlobRawDataFp16[indx] = PrecisionUtils::f32tof16(ref_values[indx]);
+ const auto mask = [&] {
+ std::string mask;
+ for (int i = 0; i < maskSize; i++) {
+ mask += std::to_string(i) + ',';
}
- break;
- case InferenceEngine::NHWC:
- for (int h = 0 ; h < inDims[2]; ++h) {
- for (int w = 0 ; w < inDims[3]; ++w) {
- for (int c = 0 ; c < inDims[1]; ++c) {
- int src_i = w + inDims[3] * h + inDims[3] * inDims[2] * c;
- int dst_i = c + inDims[1] * w + inDims[3] * inDims[1] * h;
- inputBlobRawDataFp16[dst_i] = PrecisionUtils::f32tof16(ref_values[src_i]);
- }
- }
- }
- break;
- default:
- FAIL() << "unsupported layout: " << blob->getTensorDesc().getLayout();
- }
-}
-
-TEST_P(myriadLayersTestsRegion_CHW_HW_nightly, RegionYolo) {
- std::map<std::string, std::string> params;
- params["coords"] = "4";
- params["classes"] = "20";
- params["num"] = "5";
- params["mask"] = std::string("0,1,2");
- params["do_softmax"] = "1";
- _testNet.addLayer(LayerInitParams("RegionYolo")
- .params(params)
- .in({_output_tensor})
- .out({{1, _output_tensor[0] * _output_tensor[1] * _output_tensor[2] * _output_tensor[3]}}),
- ref_RegionYolo_wrap);
- _testNet.setWeightsCallbackForLayer(0, constWeightsRange<125>);
- _testNet.setBiasesCallbackForLayer(0, constBiasesRange);
- _genDataCallback = loadData;
- ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().useHWOpt(true)));
- CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.0035);
-}
-
-TEST_P(myriadLayersTestsRegion_CHW_HW_80cl_nightly, RegionYolol) {
- std::map<std::string, std::string> params;
- params["coords"] = "4";
- params["classes"] = "80";
- params["num"] = "5";
- params["mask"] = std::string("0,1,2");
- params["do_softmax"] = "1";
- _testNet.addLayer(LayerInitParams("RegionYolo")
- .params(params)
- .in({_output_tensor})
- .out({{1, _output_tensor[0] * _output_tensor[1] * _output_tensor[2] * _output_tensor[3]}}),
- ref_RegionYolo_wrap);
- _testNet.setWeightsCallbackForLayer(0, constWeightsRange<425>);
- _testNet.setBiasesCallbackForLayer(0, constBiasesRange);
- _genDataCallback = loadData_80cl;
- ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().useHWOpt(true)));
- CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.0060);
-}
-
-class myriadLayerRegionYolo_CHW_nightly: public myriadLayersTests_nightly,
- public testing::WithParamInterface<int> {
-};
-
-TEST_P(myriadLayerRegionYolo_CHW_nightly, TestsRegion) {
- auto classes = GetParam();
- InferenceEngine::SizeVector input_dims = {1, 125, 13, 13};
- if (classes == 80) {
- input_dims[1] = 425;
- }
- IN_OUT_desc input_tensor;
- input_tensor.push_back(input_dims);
+ if (!mask.empty()) mask.pop_back();
+ return mask;
+ }();
std::map<std::string, std::string> params;
- params["coords"] = "4";
+ params["coords"] = std::to_string(coords);
params["classes"] = std::to_string(classes);
- params["num"] = "5";
- params["mask"] = std::string("0,1,2");
- params["do_softmax"] = "1";
- _testNet.addLayer(LayerInitParams("RegionYolo")
- .params(params)
- .in(input_tensor)
- .out({{1, input_dims[0] * input_dims[1] * input_dims[2] * input_dims[3]}}),
- ref_RegionYolo_wrap);
- _genDataCallback = loadData;
- if (classes == 80) {
- _genDataCallback = loadData_80cl;
- }
- ASSERT_TRUE(generateNetAndInfer(NetworkInitParams()));
- /* bound is too high , set for M2 tests */
- CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.006);
-}
+ params["num"] = std::to_string(num);
+ params["mask"] = mask;
+ params["do_softmax"] = std::to_string(doSoftmax);
+ params["axis"] = "0";
+ params["end_axis"] = "0";
-TEST_P(myriadLayerRegionYolo_CHW_nightly, Test_CHW_HWC_Compare) {
- auto classes = GetParam();
- IN_OUT_desc input_tensor;
- InferenceEngine::SizeVector input_dims = {1, 125, 13, 13};
- if (classes == 80) {
- input_dims[1] = 425;
- }
+ const auto dims = [&] {
+ const auto regions = doSoftmax ? num : maskSize;
+ const uint32_t channels = (coords + classes + 1) * regions;
+ IE_ASSERT(channels > 0);
+ return tensor_test_params{1, channels, 13, 13};
+ }();
- input_tensor.push_back(input_dims);
+ SetInputTensor(dims);
+ SetOutputTensor(dims);
- std::map<std::string, std::string> params;
- params["coords"] = "4";
- params["classes"] = std::to_string(classes);
- params["num"] = "5";
- params["mask"] = std::string("0,1,2");
- params["do_softmax"] = "1";
- _testNet.addLayer(LayerInitParams("RegionYolo")
- .params(params)
- .in(input_tensor)
- .out({{1, input_dims[0] * input_dims[1] * input_dims[2] * input_dims[3]}}),
- ref_RegionYolo_wrap);
- if (classes == 80) {
- _genDataCallback = loadData_80cl;
- }
- _config[VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION)] = CONFIG_VALUE(NO);
- ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().useHWOpt(false).runRefGraph(false)));
- /* create NHWC version */
- /* we cannot use the same generateNetAndInfer call due */
- /* to IE bug. */
- InferenceEngine::InputsDataMap inputsInfo;
- InferenceEngine::BlobMap outputMap;
- InferenceEngine::OutputsDataMap outputsInfo;
- InferenceEngine::IExecutableNetwork::Ptr exeNetwork;
- InferenceEngine::IInferRequest::Ptr inferRequest;
+ ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("RegionYolo").params(params),
+ NetworkInitParams()
+ .layoutPreference(layoutPreference)
+ .lockLayout(true)));
- _inputsInfo.begin()->second->setLayout(NHWC);
- _outputsInfo.begin()->second->setLayout(NC);
+ ASSERT_TRUE(Infer());
- InferenceEngine::StatusCode st = InferenceEngine::StatusCode::GENERAL_ERROR;
- ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(exeNetwork, _cnnNetwork, _config, &_resp));
- ASSERT_NE(exeNetwork, nullptr) << _resp.msg;
- ASSERT_NO_THROW(exeNetwork->CreateInferRequest(inferRequest, &_resp)) << _resp.msg;
- ASSERT_NE(inferRequest, nullptr) << _resp.msg;
- ASSERT_NO_THROW(inputsInfo = _cnnNetwork.getInputsInfo());
- auto inIt = _inputsInfo.begin();
- for (auto in = _inputsInfo.begin(); in != _inputsInfo.end(); in++) {
- Blob::Ptr inpt;
- ASSERT_NO_THROW(_inferRequest->GetBlob(inIt->first.c_str(), inpt, &_resp));
- ASSERT_NO_THROW(inferRequest->SetBlob(inIt->first.c_str(), inpt, &_resp));
- ++inIt;
- }
- ASSERT_NO_THROW(outputsInfo = _cnnNetwork.getOutputsInfo());
- auto outIt = _outputsInfo.begin();
- for (auto outputInfo : outputsInfo) {
- outputInfo.second->setPrecision(outIt->second->getTensorDesc().getPrecision());
- InferenceEngine::SizeVector outputDims = outputInfo.second->getTensorDesc().getDims();
- Blob::Ptr outputBlob = nullptr;
- Layout layout = outIt->second->getTensorDesc().getLayout();
- // work only with NHWC layout if size of the input dimensions == NHWC
- switch (outputInfo.second->getPrecision()) {
- case Precision::FP16:
- outputBlob = InferenceEngine::make_shared_blob<ie_fp16>({Precision::FP16, outputDims, layout});
- break;
- case Precision::FP32:
- outputBlob = InferenceEngine::make_shared_blob<float>({Precision::FP32, outputDims, layout});
- break;
- default:
- THROW_IE_EXCEPTION << "Unsupported precision for output. Supported FP16, FP32";
- }
- outputBlob->allocate();
- st = inferRequest->SetBlob(outputInfo.first.c_str(), outputBlob, &_resp);
- outputMap[outputInfo.first] = outputBlob;
- ASSERT_EQ((int) InferenceEngine::StatusCode::OK, st) << _resp.msg;
- ++outIt;
- }
- ASSERT_EQ(inferRequest->Infer(&_resp), InferenceEngine::OK);
- /* bound is too high !!!! investigation TBD */
- CompareCommonAbsolute(_outputMap.begin()->second, outputMap.begin()->second, 0.001);
+ ASSERT_NO_FATAL_FAILURE(ref_RegionYolo(_inputMap.begin()->second, _refBlob,
+ coords, classes, num, maskSize, doSoftmax));
+
+ CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
}
-const std::vector<int> s_classes = {20, 80};
+std::vector<CustomConfig> s_CustomConfig = {
+ {""},
+#ifdef VPU_HAS_CUSTOM_KERNELS
+ getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
+#endif
+};
\ No newline at end of file
#include "myriad_layers_relu_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReLU_smoke,
::testing::Combine(
::testing::ValuesIn(s_copyTensors),
::testing::ValuesIn(s_reluLayerParams)
);
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayerFullyConnectedWithReLU_nightly,
+ accuracy, myriadLayerFullyConnectedWithReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_fcTestParamsSubset),
::testing::Values(g_dimensionsFC[0]),
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsLite),
::testing::ValuesIn(s_reluLayerParams))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsLite),
::testing::ValuesIn(s_reluLayerParams))
);
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput_postOp),
::testing::Values<pooling_layer_params>(MAKE_STRUCT(pooling_layer_params, {3, 3}, {1, 1}, {1, 1})),
::testing::Values<ReLULayerDef>(MAKE_STRUCT(ReLULayerDef, {{{"negative_slope", "0.0"}}})))
);
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput_postOp),
::testing::Values<pooling_layer_params>(MAKE_STRUCT(pooling_layer_params, {3, 3}, {1, 1}, {1, 1})),
::testing::Values<ReLULayerDef>(MAKE_STRUCT(ReLULayerDef, {{{"negative_slope", "0.0"}}})))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_convolutionTensors)
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayerConvolutionWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayerConvolutionWithReLU_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput_postOp)
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
const std::string relu_param = "negative_slope";
-class myriadLayersTestsReLUMergeWithBias_nightly : public myriadLayersTests_nightly {
+class myriadLayersTestsReLUMergeWithBias_smoke : public myriadLayersTests_nightly {
public:
void RunTest(const std::string& model, size_t num_weights, size_t num_bias) {
StatusCode st;
{{{"negative_slope", "0.1"}}},
};
-typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, ReLULayerDef>> myriadLayerReLU_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, ReLULayerDef>> myriadLayerReLU_smoke;
-TEST_P(myriadLayerReLU_nightly, ReLU) {
+TEST_P(myriadLayerReLU_smoke, ReLU) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
auto input_dims = std::get<0>(GetParam());
auto extraLayerParams = std::get<1>(GetParam());
},
};
-class myriadLayerFullyConnectedWithReLU_nightly: public FCTest<ReLULayerDef>{
+class myriadLayerFullyConnectedWithReLU_smoke: public FCTest<ReLULayerDef>{
};
-TEST_P(myriadLayerFullyConnectedWithReLU_nightly, TestsFullyConnected)
+TEST_P(myriadLayerFullyConnectedWithReLU_smoke, TestsFullyConnected)
{
auto p = ::testing::WithParamInterface<std::tuple<fcon_test_params, int32_t, int32_t, ReLULayerDef>>::GetParam();
auto extraLayerParams = std::get<3>(p);
#define ERROR_BOUND_WITH_RELU (4.e-3f)
-class myriadLayersTestsMaxPoolingWithReLU_nightly: public PoolingTest<POOLING_MAX, ReLULayerDef>{
+class myriadLayersTestsMaxPoolingWithReLU_smoke: public PoolingTest<POOLING_MAX, ReLULayerDef>{
};
-class myriadLayersTestsAvgPoolingWithReLU_nightly: public PoolingTest<POOLING_AVG, ReLULayerDef>{
+class myriadLayersTestsAvgPoolingWithReLU_smoke: public PoolingTest<POOLING_AVG, ReLULayerDef>{
};
-TEST_P(myriadLayersTestsMaxPoolingWithReLU_nightly, TestsMaxPoolingWithReLU)
+TEST_P(myriadLayersTestsMaxPoolingWithReLU_smoke, TestsMaxPoolingWithReLU)
{
auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, pooling_layer_params, vpu::LayoutPreference, ReLULayerDef>>::GetParam();
auto extraLayerParams = std::get<3>(p);
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_RELU);
}
-TEST_P(myriadLayersTestsAvgPoolingWithReLU_nightly, TestsAvgPoolingWithReLU)
+TEST_P(myriadLayersTestsAvgPoolingWithReLU_smoke, TestsAvgPoolingWithReLU)
{
auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, pooling_layer_params, vpu::LayoutPreference, ReLULayerDef>>::GetParam();
auto extraLayerParams = std::get<3>(p);
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_RELU);
}
-class myriadLayerConvolutionWithReLU_nightly: public ConvolutionTest<ReLULayerDef>{
+class myriadLayerConvolutionWithReLU_smoke: public ConvolutionTest<ReLULayerDef>{
};
-TEST_P(myriadLayerConvolutionWithReLU_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionWithReLU_smoke, Convolution) {
auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, uint32_t, uint32_t, ReLULayerDef>>::GetParam();
auto ReLUParam = std::get<6>(p);
_testNet.addLayer(LayerInitParams("ReLU")
#include "myriad_layers_reorg_test.hpp"
-static std::vector<std::string> s_CustomConfig = {
- "",
-#ifdef VPU_HAS_CUSTOM_KERNELS
- getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
-#endif
-};
-
-static std::vector<layoutPreference> layoutPreferences = {
- vpu::LayoutPreference::ChannelMajor,
-#ifndef VPU_HAS_CUSTOM_KERNELS
- vpu::LayoutPreference::ChannelMinor
-#endif
-};
-
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReorg_nightly, ::testing::Combine(
- ::testing::Values<DimsInput>(
- MAKE_STRUCT(tensor_test_params, 1, 64, 26, 26),
- MAKE_STRUCT(tensor_test_params, 1, 192, 6 * 26, 6 * 26),
- MAKE_STRUCT(tensor_test_params, 1, 4, 6, 6)
- ),
- ::testing::Values<ScaleOutput>(2),
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReorg_smoke, ::testing::Combine(
+ ::testing::ValuesIn(s_ReorgInputs),
::testing::Values<Stride>(2),
- ::testing::ValuesIn(layoutPreferences),
- ::testing::ValuesIn(s_CustomConfig),
- ::testing::Values<IRVersion>(IRVersion::v7, IRVersion::v10)
+ ::testing::Values(vpu::LayoutPreference::ChannelMinor, vpu::LayoutPreference::ChannelMajor),
+ ::testing::Values(IRVersion::v7, IRVersion::v10),
+ ::testing::ValuesIn(s_CustomConfig)
));
#include <gtest/gtest.h>
#include "myriad_layers_tests.hpp"
-using std::tuple;
-using std::get;
-
using namespace InferenceEngine;
-static void reorg_calculate(short *inp, int w, int h, int c, int batch, int stride, float *out)
+static void reorg_calculate(const Blob::Ptr src, Blob::Ptr dst, int stride)
{
- int out_c = c / (stride*stride);
-
- int oc = c * (stride*stride);
- int oh = h / stride;
- int ow = w / stride;
-
- for(int b = 0; b < batch; ++b)
- {
- for(int k = 0; k < c; ++k)
- {
- for(int j = 0; j < h; ++j)
- {
- for(int i = 0; i < w; ++i)
- {
- int in_index = i + w * (j + h * (k + c * b));
-
- int new_z = in_index / (oh*ow);
- int new_y = (in_index %(oh*ow)) / ow;
- int new_x = (in_index %(oh*ow)) % ow;
- int new_index = new_z + new_x * oc + new_y * oc * ow;
-
- int c2 = k % out_c;
- int offset = k / out_c;
- int w2 = i*stride + offset % stride;
- int h2 = j*stride + offset / stride;
- int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
-
- out[new_index] = PrecisionUtils::f16tof32(inp[out_index]);
- }
- }
- }
- }
+ ASSERT_NE(src, nullptr);
+ ASSERT_NE(dst, nullptr);
+ const uint16_t *src_data = src->buffer();
+ uint16_t *dst_data = dst->buffer();
+ ASSERT_NE(src_data, nullptr);
+ ASSERT_NE(dst_data, nullptr);
+
+ const auto inputDims = src->getTensorDesc().getDims();
+ const int C = inputDims[1];
+ const int H = inputDims[2];
+ const int W = inputDims[3];
+
+ const auto inputCHW = [&] {
+ auto inputCHW = std::vector<ie_fp16>(C*H*W);
+ if (Layout::NCHW == src->getTensorDesc().getLayout()) {
+ std::copy(src_data, src_data + C*H*W, begin(inputCHW));
+ } else {
+ for (int c = 0; c < C; c++) {
+ for (int h = 0; h < H; h++) {
+ for (int w = 0; w < W; w++) {
+ inputCHW[c*H*W + h*W + w] = src_data[h*W*C + w*C + c];
+ }
+ }
+ }
+ }
+ return inputCHW;
+ }();
+
+ const int C2 = C/(stride*stride);
+ const int H2 = H*stride;
+ const int W2 = W*stride;
+
+ for (int c = 0; c < C; ++c) {
+ for (int h = 0; h < H; ++h) {
+ for (int w = 0; w < W; ++w) {
+ const int offset = c/C2;
+ const int c2 = c - C2*offset;
+ const int h2 = h*stride + offset/stride;
+ const int w2 = w*stride + offset - stride*(offset/stride);
+
+ dst_data[c*H*W + h*W + w] = inputCHW[c2*H2*W2 + h2*W2 + w2];
+ }
+ }
+ }
+
+ dst->getTensorDesc().setLayout(Layout::NCHW);
}
PRETTY_PARAM(Stride, int);
-PRETTY_PARAM(ScaleOutput, int);
PRETTY_PARAM(layoutPreference, vpu::LayoutPreference);
+PRETTY_PARAM(CustomConfig, std::string)
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, Stride, layoutPreference, IRVersion, CustomConfig>>
+ myriadLayersTestsReorg_smoke;
-typedef myriadLayerTestBaseWithParam<tuple<DimsInput, ScaleOutput, Stride, layoutPreference, std::string, IRVersion>> myriadLayersTestsReorg_nightly;
-
-TEST_P(myriadLayersTestsReorg_nightly, TestsReorg) {
+TEST_P(myriadLayersTestsReorg_smoke, TestsReorg) {
+ const SizeVector dimsInput = std::get<0>(GetParam());
+ const int stride = std::get<1>(GetParam());
+ const auto layoutPreference = std::get<2>(GetParam());
+ _irVersion = std::get<3>(GetParam());
+ const std::string customConfig = std::get<4>(GetParam());
- // TODO: M2 mode is not working for OpenCL compiler
- if(!get<4>(GetParam()).empty() && !CheckMyriadX()) {
- GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
- }
+ if(!customConfig.empty() && !CheckMyriadX()) {
+ GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+ }
+ _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
- tensor_test_params dimsInput = get<0>(GetParam());
+ const auto dimsOutput = SizeVector{dimsInput[0],
+ dimsInput[1] * (stride * stride),
+ dimsInput[2] / stride,
+ dimsInput[3] / stride};
- int scaleOutput = get<1>(GetParam());
- tensor_test_params dimsOutput = {dimsInput.n, dimsInput.c * (scaleOutput * scaleOutput), dimsInput.h / scaleOutput, dimsInput.w / scaleOutput};
+ SetInputTensors({dimsInput});
+ SetOutputTensors({dimsOutput});
- int stride = get<2>(GetParam());
- auto layoutPreference = get<3>(GetParam());
- _irVersion = get<5>(GetParam());
std::map<std::string, std::string> params;
- std::string type = "ReorgYolo";
-
params["stride"] = std::to_string(stride);
- SetInputTensor(dimsInput);
- SetOutputTensor(dimsOutput);
- _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = get<4>(GetParam());
- ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams(type)
- .params(params),
- NetworkInitParams().layoutPreference(layoutPreference)
- .outputPrecision(InferenceEngine::Precision::FP32)));
- /* input data preparation */
- SetInputInOrder();
+
+ ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("ReorgYolo").params(params),
+ NetworkInitParams()
+ .layoutPreference(layoutPreference)
+ .lockLayout(true)));
ASSERT_TRUE(Infer());
- InferenceEngine::SizeVector inputDims = _inputsInfo.begin()->second->getTensorDesc().getDims();
- InferenceEngine::Blob::Ptr inputBlobRef =
- InferenceEngine::make_shared_blob<short>({InferenceEngine::Precision::FP16, inputDims, InferenceEngine::NHWC});
- inputBlobRef->allocate();
- short *inputBlobRefRawData = inputBlobRef->buffer();
-
- int c = inputDims[1];
- int h = inputDims[2];
- int w = inputDims[3];
-
- auto inputBlob =_inputMap[_inputsInfo.begin()->first];
- short * inputBlob_data = inputBlob->buffer();
-
- /* Preliminary repacking */
- for(int k = 0; k < c; k++)
- {
- for(int j = 0; j < h; j++)
- {
- for(int i = 0; i < w; i++)
- {
- int dst_index = i + w * j + w * h * k;
- int src_index = k + c * i + c * w * j;
-
- inputBlobRefRawData[dst_index] = inputBlob_data[src_index];
- }
- }
- }
-
- auto outputBlob =_outputMap[_outputsInfo.begin()->first];
- InferenceEngine::SizeVector outputDims = _outputsInfo.begin()->second->getTensorDesc().getDims();
-
- InferenceEngine::TBlob<float>::Ptr outputBlobRef =
- InferenceEngine::make_shared_blob<float>(TensorDesc(InferenceEngine::Precision::FP32, outputDims, InferenceEngine::NCHW));
- outputBlobRef->allocate();
- float *outputBlobRefRawData = outputBlobRef->buffer();
-
- reorg_calculate(inputBlobRefRawData, w, h, c, 1, stride, outputBlobRefRawData);
-
- compare(outputBlob->buffer(), outputBlobRef->buffer(), outputBlob->size(), 0.0);
+
+ ASSERT_NO_FATAL_FAILURE(reorg_calculate(_inputMap.begin()->second, _refBlob, stride));
+
+ CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0);
}
+
+static std::vector<CustomConfig> s_CustomConfig = {
+ {""},
+#ifdef VPU_HAS_CUSTOM_KERNELS
+ getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
+#endif
+};
+
+static std::vector<SizeVector> s_ReorgInputs = {
+ {1, 64, 26, 26},
+ {1, 192, 6 * 26, 6 * 26},
+ {1, 4, 6, 6}
+};
\ No newline at end of file
#include "myriad_layers_resample_test.hpp"
-INSTANTIATE_TEST_CASE_P(myriad, myriadResampleLayerTests_nightly,
- ::testing::Combine(
- ::testing::Values(CONFIG_VALUE(NO), CONFIG_VALUE(YES)),
- ::testing::ValuesIn(s_ResampleCustomConfig),
- ::testing::ValuesIn(s_ResampleAntialias)));
+// #-31522
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_accuracy, myriadResampleLayerTests_smoke,
+ ::testing::Combine(
+ ::testing::ValuesIn(s_ResampleInput),
+ ::testing::Values<Factor>(2.0f, 0.5f),
+ ::testing::Values<Antialias>(false, true),
+ ::testing::Values<HwOptimization>(false, true),
+ ::testing::ValuesIn(s_CustomConfig))
+);
#include <cmath>
#include "myriad_layers_tests.hpp"
-// #include <iostream>
using namespace InferenceEngine;
#define ERROR_BOUND 1e-3
+PRETTY_PARAM(Factor, float)
+PRETTY_PARAM(Antialias, int)
+PRETTY_PARAM(HwOptimization, bool);
+PRETTY_PARAM(CustomConfig, std::string);
+
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, Factor, Antialias, HwOptimization, CustomConfig>>
+ myriadResampleLayerTests_smoke;
+
static inline float triangleCoeff(float x)
{
return (1.0f - fabsf(x));
}
+
void refResample(const Blob::Ptr src, Blob::Ptr dst, int antialias) {
ie_fp16 *src_data = static_cast<ie_fp16*>(src->buffer());
ie_fp16 *output_sequences = static_cast<ie_fp16*>(dst->buffer());
if (IH == OH && IW == OW)
{
- int b = 0;
- for (int c = 0; c < C; c++)
- for (int h = 0; h < IH; h++)
- for (int w = 0; w < IW; w++){
- int dst_index = w + IW * h + IW * IH * c;
- int src_index = dst_index;
- output_sequences[dst_index] = src_data[src_index];
- }
+ std::copy(src_data, src_data + C*IH*IW, output_sequences);
return;
}
}
}
-PRETTY_PARAM(hwAcceleration, std::string);
-PRETTY_PARAM(customConfig, std::string);
-PRETTY_PARAM(Antialias, int)
-
-typedef myriadLayerTestBaseWithParam<std::tuple<std::string, std::string, Antialias>> myriadResampleLayerTests_nightly;
-
-TEST_P(myriadResampleLayerTests_nightly, Resample) {
- std::string model = R"V0G0N(
- <net name="Resample" version="2" batch="1">
- <layers>
- <layer id="0" name="data" precision="FP16" type="Input">
- <output>
- <port id="0">
- <dim>1</dim>
- <dim>128</dim>
- <dim>26</dim>
- <dim>26</dim>
- </port>
- </output>
- </layer>
- <layer id="1" name="detector/yolo-v3/ResizeNearestNeighbor" precision="FP16" type="Resample">
- <data antialias="@TEST@" factor="2.0" type="caffe.ResampleParameter.NEAREST" fx="0.5" fy="0.5"/>
- <input>
- <port id="1">
- <dim>1</dim>
- <dim>128</dim>
- <dim>26</dim>
- <dim>26</dim>
- </port>
- </input>
- <output>
- <port id="2">
- <dim>1</dim>
- <dim>128</dim>
- <dim>52</dim>
- <dim>52</dim>
- </port>
- </output>
- </layer>
- </layers>
- <edges>
- <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
- </edges>
- </net>
- )V0G0N";
-
- SetSeed(DEFAULT_SEED_VALUE + 6);
-
- std::string HWConfigValue = std::get<0>(GetParam());
- std::string customConfig = std::get<1>(GetParam());
- int antialias = std::get<2>(GetParam());
-
- model.replace( model.find("@TEST@"), sizeof("@TEST@") -1, std::to_string(antialias));
- if((customConfig != "") || (antialias != 1)){
- if(!customConfig.empty() && !CheckMyriadX()) {
- GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
- }
- _config[VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION)] = HWConfigValue;
- _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
- StatusCode st;
-
- ASSERT_NO_THROW(readNetwork(model));
-
- const auto& network = _cnnNetwork;
+TEST_P(myriadResampleLayerTests_smoke, Resample) {
+ const SizeVector inputDims = std::get<0>(GetParam());
+ const float factor = std::get<1>(GetParam());
+ const bool antialias = std::get<2>(GetParam());
+ const bool hwOptimization = std::get<3>(GetParam());
+ const std::string customConfig = std::get<4>(GetParam());
- _inputsInfo = network.getInputsInfo();
- _inputsInfo["data"]->setPrecision(Precision::FP16);
- _inputsInfo["data"]->setLayout(NCHW);
+ ASSERT_GT(factor, 0);
- _outputsInfo = network.getOutputsInfo();
- _outputsInfo["detector/yolo-v3/ResizeNearestNeighbor"]->setPrecision(Precision::FP16);
+ if (customConfig.empty() && antialias) {
+ GTEST_SKIP() << "Native Resample with antialiasing is not supported";
+ }
- ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(_exeNetwork, network,
- {{VPU_CONFIG_KEY(CUSTOM_LAYERS), customConfig}, {VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), HWConfigValue}}, &_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
- ASSERT_NE(_exeNetwork, nullptr) << _resp.msg;
+ if (!customConfig.empty() && !CheckMyriadX()) {
+ GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+ }
- ASSERT_NO_THROW(st = _exeNetwork->CreateInferRequest(_inferRequest, &_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+ _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
- Blob::Ptr data;
- ASSERT_NO_THROW(st = _inferRequest->GetBlob("data", data, &_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+ const auto outputDims = SizeVector{inputDims[0],
+ inputDims[1],
+ (size_t)(inputDims[2] * factor),
+ (size_t)(inputDims[3] * factor)};
- GenRandomData(data);
+ SetInputTensors({inputDims});
+ SetOutputTensors({outputDims});
- ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+ std::map<std::string, std::string> params;
+ params["antialias"] = std::to_string((int)antialias);
+ params["factor"] = std::to_string(factor);
- Blob::Ptr outputBlob;
- ASSERT_NO_THROW(_inferRequest->GetBlob("detector/yolo-v3/ResizeNearestNeighbor", outputBlob, &_resp));
- ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+ ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("Resample").params(params),
+ NetworkInitParams()
+ .useHWOpt(hwOptimization)
+ .lockLayout(true)));
- _refBlob = make_shared_blob<ie_fp16>(TensorDesc(Precision::FP16, outputBlob->getTensorDesc().getDims(), NCHW));
- _refBlob->allocate();
+ ASSERT_TRUE(Infer());
- refResample(data, _refBlob, antialias);
+ ASSERT_NO_FATAL_FAILURE(refResample(_inputMap.begin()->second, _refBlob, antialias));
- CompareCommonAbsolute(outputBlob, _refBlob, ERROR_BOUND);
- }
+ CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
}
-static std::vector<std::string> s_ResampleCustomConfig = {
- "",
+static std::vector<SizeVector> s_ResampleInput = {
+ {1, 128, 26, 26},
+ {1, 64, 52, 52},
+ {1, 23, 14, 14}
+};
+
+static std::vector<CustomConfig> s_CustomConfig = {
+ {""},
#ifdef VPU_HAS_CUSTOM_KERNELS
getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
#endif
};
-static std::vector<Antialias> s_ResampleAntialias = {
- {0, 1}
-};
#include "ngraph_functions/subgraph_builders.hpp"
#include "myriad_layers_reshape_test.hpp"
-TEST_F(myriadEliminateReshapeTests_nightly, SplitConvConcat) {
+TEST_F(myriadEliminateReshapeTests_smoke, SplitConvConcat) {
ASSERT_NO_THROW(_cnnNetwork = InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSplitConvConcat()));
StatusCode st;
EXPECT_EQ(InferenceEngineProfileInfo::NOT_RUN, layerInfo.status);
}
-TEST_F(myriadLayerReshapeFasterRCNN_nightly, Reshape) {
+TEST_F(myriadLayerReshapeFasterRCNN_smoke, Reshape) {
InferenceEngine::SizeVector input_tensor = {1, 14, 14, 24};
InferenceEngine::SizeVector output_tensor = {1, 2352, 2};
std::map<std::string, std::string> layer_params = {
ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().useHWOpt( CheckMyriadX())));
}
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReshape_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReshape_smoke,
::testing::Combine(
::testing::ValuesIn(s_reshapeInParams),
::testing::ValuesIn(s_reshapeOutParams))
);
-INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayerReshape_nightly,
+INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayerReshape_smoke,
::testing::Values(
std::make_tuple(
SizeVector{400, 12544},
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReshapeBeforeFC_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReshapeBeforeFC_smoke,
::testing::Values(CONFIG_VALUE(YES), CONFIG_VALUE(NO))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReshapeFasterRCNN_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReshapeFasterRCNN_smoke,
::testing::Combine(
::testing::ValuesIn(s_convTensor)
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
using namespace InferenceEngine;
-using myriadEliminateReshapeTests_nightly = myriadLayersTests_nightly;
+using myriadEliminateReshapeTests_smoke = myriadLayersTests_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> myriadLayerReshape_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> myriadLayerReshape_smoke;
-TEST_P(myriadLayerReshape_nightly, Reshape) {
+TEST_P(myriadLayerReshape_smoke, Reshape) {
auto input_tensor = std::get<0>(GetParam());
auto output_tensor = std::get<1>(GetParam());
}
-typedef myriadLayersTests_nightly myriadLayerReshapeFasterRCNN_nightly;
+typedef myriadLayersTests_nightly myriadLayerReshapeFasterRCNN_smoke;
static std::vector<InferenceEngine::SizeVector> s_reshapeInParams = {
{{1, 4, 2, 16}},
)V0G0N";
-typedef myriadLayerTestBaseWithParam<std::string> myriadLayersTestsReshapeBeforeFC_nightly;
+typedef myriadLayerTestBaseWithParam<std::string> myriadLayersTestsReshapeBeforeFC_smoke;
-TEST_P(myriadLayersTestsReshapeBeforeFC_nightly, OptimizeReshapeIfItIsPlacedBeforeFC) {
+TEST_P(myriadLayersTestsReshapeBeforeFC_smoke, OptimizeReshapeIfItIsPlacedBeforeFC) {
std::string HWConfigValue = GetParam();
if (!CheckMyriadX() && HWConfigValue == CONFIG_VALUE(YES)) {
std::cout << "Disable for non-MyriadX devices" << std::endl;
EXPECT_EQ(InferenceEngineProfileInfo::NOT_RUN, layerInfo.status);
}
-class myriadLayersTestsReshapeFasterRCNN_nightly: public ConvolutionTest<>{
+class myriadLayersTestsReshapeFasterRCNN_smoke: public ConvolutionTest<>{
};
// FIXME: rewrite the test (it doesn't use Convolution) avoid HWC layout for 3D tensor in reference code
-TEST_P(myriadLayersTestsReshapeFasterRCNN_nightly, DISABLED_Convolution) {
+TEST_P(myriadLayersTestsReshapeFasterRCNN_smoke, DISABLED_Convolution) {
std::map<std::string, std::string> permute_params = {
{"order", "0,2,3,1"}
};
#include "myriad_layers_reverse_sequence_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReverseSequence_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReverseSequence_smoke,
::testing::Combine(
::testing::Values<ReverseSequence>(
MAKE_STRUCT(reverse_sequence_test_params, {5, 6, 18}, 0, 0)
};
PRETTY_PARAM(ReverseSequence, reverse_sequence_test_params);
-typedef myriadLayerTestBaseWithParam<std::tuple<ReverseSequence, IRVersion>> myriadLayerReverseSequence_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<ReverseSequence, IRVersion>> myriadLayerReverseSequence_smoke;
static int nchw_to_nhwc(InferenceEngine::SizeVector dims, int ind)
{
}
}
-TEST_P(myriadLayerReverseSequence_nightly, ReverseSequence) {
+TEST_P(myriadLayerReverseSequence_smoke, ReverseSequence) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
reverse_sequence_test_params input_dims = std::get<0>(GetParam());
}
}
-class myriadLayersRfcnTests_nightly: public myriadLayersTests_nightly {
+class myriadLayersRfcnTests_smoke: public myriadLayersTests_nightly {
public:
void GenROIs(InferenceEngine::Blob::Ptr rois,
const uint32_t in_width, const uint32_t in_height,
Blob::Ptr outputBlob;
};
-TEST_F(myriadLayersRfcnTests_nightly, ReshapeRfcn)
+TEST_F(myriadLayersRfcnTests_smoke, ReshapeRfcn)
{
StatusCode st = GENERAL_ERROR;
CompareCommonAbsolute(outputBlob, prior_network_output, 0.0f);
}
-TEST_F(myriadLayersRfcnTests_nightly, SoftmaxRfcn)
+TEST_F(myriadLayersRfcnTests_smoke, SoftmaxRfcn)
{
StatusCode st = GENERAL_ERROR;
CompareCommonAbsolute(outputBlob, _refBlob, ERROR_BOUND);
}
-TEST_F(myriadLayersRfcnTests_nightly, GlobalAvgPooling7x7Rfcn)
+TEST_F(myriadLayersRfcnTests_smoke, GlobalAvgPooling7x7Rfcn)
{
StatusCode st = GENERAL_ERROR;
#include "myriad_layers_roi_align_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIAlign_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIAlign_smoke,
::testing::Combine(
::testing::ValuesIn(s_ROIAlignLayerInput),
::testing::ValuesIn(s_ROIAlignLayerParam),
PRETTY_PARAM(number_rois, uint32_t);
using ROIAlignTestParams = std::tuple<Dims, roi_align_param, number_rois, roi_align_mode>;
-typedef myriadLayerTestBaseWithParam<ROIAlignTestParams> myriadLayersTestsROIAlign_nightly;
+typedef myriadLayerTestBaseWithParam<ROIAlignTestParams> myriadLayersTestsROIAlign_smoke;
const int roi_cols = 4;
return model;
}
-TEST_P(myriadLayersTestsROIAlign_nightly, ROIAlign) {
+TEST_P(myriadLayersTestsROIAlign_smoke, ROIAlign) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
const tensor_test_params dims_layer_in = std::get<0>(GetParam());
#include "myriad_layers_roi_feature_extractor_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIFeatureExtractor_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIFeatureExtractor_smoke,
::testing::Combine(
::testing::ValuesIn(s_ROIFeatureExtractorLayerInput),
::testing::ValuesIn(s_ROIFeatureExtractorLayerParam),
using ROIFeatureExtractorTestParams = std::tuple<Dims, roi_feature_extractor_param, number_rois>;
-typedef myriadLayerTestBaseWithParam<ROIFeatureExtractorTestParams> myriadLayersTestsROIFeatureExtractor_nightly;
+typedef myriadLayerTestBaseWithParam<ROIFeatureExtractorTestParams> myriadLayersTestsROIFeatureExtractor_smoke;
static void genROIs(InferenceEngine::Blob::Ptr rois,
const roi_feature_extractor_param& params,
}
}
-TEST_P(myriadLayersTestsROIFeatureExtractor_nightly, ROIFeatureExtractor) {
+TEST_P(myriadLayersTestsROIFeatureExtractor_smoke, ROIFeatureExtractor) {
tensor_test_params dims_layer_in = std::get<0>(GetParam());
roi_feature_extractor_param test_params = std::get<1>(GetParam());
const uint32_t num_rois = std::get<2>(GetParam());
#include "myriad_layers_roi_pooling_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIPooling_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIPooling_smoke,
::testing::Combine(
::testing::ValuesIn(s_ROIPoolingLayerInput),
::testing::ValuesIn(s_ROIPoolingLayerParam),
using ROIPoolingTestParams = std::tuple<Dims, roi_pooling_param, uint32_t, roi_pooling_method, IRVersion>;
-class myriadLayersTestsROIPooling_nightly: public myriadLayerTestBaseWithParam<ROIPoolingTestParams> {
+class myriadLayersTestsROIPooling_smoke: public myriadLayerTestBaseWithParam<ROIPoolingTestParams> {
public:
void genROIs(InferenceEngine::Blob::Ptr rois,
const ROIPoolingParams& params,
}
};
-TEST_P(myriadLayersTestsROIPooling_nightly, ROIPooling) {
+TEST_P(myriadLayersTestsROIPooling_smoke, ROIPooling) {
tensor_test_params dims_layer_in = std::get<0>(GetParam());
ROIPoolingParams test_params = std::get<1>(GetParam());
const uint32_t num_rois = std::get<2>(GetParam());
#include "myriad_layers_scale_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsScale_nightly,
+ accuracy, myriadLayersTestsScale_smoke,
::testing::Combine(
::testing::ValuesIn(s_inputScaleTensors),
::testing::ValuesIn(s_inputBiasScale)));
typedef std::tuple<SizeVector, bool> TestScaleShift;
-class myriadLayersTestsScale_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsScale_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<TestScaleShift> {
};
-TEST_P(myriadLayersTestsScale_nightly, TestsScale)
+TEST_P(myriadLayersTestsScale_smoke, TestsScale)
{
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
{ 16, 512, 56, 56 },
};
-INSTANTIATE_TEST_CASE_P(nd_tensors, myriadLayersScatterElementsUpdateTest_nightly,
+INSTANTIATE_TEST_CASE_P(nd_tensors, myriadLayersScatterElementsUpdateTest_smoke,
Combine(
ValuesIn(dataShapeList_ndTensors),
ValuesIn(dataTypeList)));
-INSTANTIATE_TEST_CASE_P(use_cases, myriadLayersScatterElementsUpdateTest_nightly,
+INSTANTIATE_TEST_CASE_P(use_cases, myriadLayersScatterElementsUpdateTest_smoke,
Combine(
ValuesIn(dataShapeList_useCases),
ValuesIn(dataTypeList)));
using ScatterElementsUpdateTestParams = std::tuple<DataShape,
DataType>;
-class myriadLayersScatterElementsUpdateTest_nightly :
+class myriadLayersScatterElementsUpdateTest_smoke :
public myriadLayerTestBaseWithParam<ScatterElementsUpdateTestParams> {
protected:
std::mt19937 m_gen;
};
-TEST_P(myriadLayersScatterElementsUpdateTest_nightly, accuracy) {
+TEST_P(myriadLayersScatterElementsUpdateTest_smoke, accuracy) {
testScatterElementsUpdate();
}
INSTANTIATE_TEST_CASE_P(
nd_tensors,
- myriadLayersScatterUpdateTest_nightly,
+ myriadLayersScatterUpdateTest_smoke,
Values(
// 1-dimensional `indices`
ScatterUpdateTestParams { { 1000 }, { 100000 } },
INSTANTIATE_TEST_CASE_P(
use_cases,
- myriadLayersScatterUpdateTest_nightly,
+ myriadLayersScatterUpdateTest_smoke,
Values(
// use case from Mask R-CNN: N = 1000, C = 256, HxW = 7x7
ScatterUpdateTestParams { { 32 }, { 1000, 256, 7, 7} },
using ScatterUpdateTestParams = std::tuple<IndicesShape,
InputShape>;
-class myriadLayersScatterUpdateTest_nightly:
+class myriadLayersScatterUpdateTest_smoke:
public myriadLayerTestBaseWithParam<ScatterUpdateTestParams>
{
protected:
}
};
-TEST_P(myriadLayersScatterUpdateTest_nightly, accuracy) {
+TEST_P(myriadLayersScatterUpdateTest_smoke, accuracy) {
testScatterUpdate();
}
#include "myriad_layers_select_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsSelect_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsSelect_smoke,
::testing::Combine(
::testing::ValuesIn(s_eltwiseTensors),
::testing::ValuesIn(s_eltwiseDims))
}
};
-class myriadTestsSelect_nightly: public SelectTest
+class myriadTestsSelect_smoke: public SelectTest
{
void SetUp() override {
SelectTest::SetUp();
}
};
-TEST_P(myriadTestsSelect_nightly, Select)
+TEST_P(myriadTestsSelect_smoke, Select)
{
InitBody();
}
#include "myriad_layers_sigmoid_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsSigmoid_nightly,
+ accuracy, myriadLayersTestsSigmoid_smoke,
::testing::ValuesIn(s_sigmoidParams));
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithSigmoid_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithSigmoid_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsLite),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithSigmoid_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithSigmoid_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsLite),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithSigmoid_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithSigmoid_smoke,
::testing::Combine(
::testing::ValuesIn(g_convolutionTensors)
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
);
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayerFullyConnectedWithSigmoid_nightly,
+ accuracy, myriadLayerFullyConnectedWithSigmoid_smoke,
::testing::Combine(
::testing::ValuesIn(g_fcTestParamsSubset),
::testing::Values(g_dimensionsFC[0]),
using namespace InferenceEngine;
-class myriadLayersTestsSigmoid_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsSigmoid_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<InferenceEngine::SizeVector> {
public:
};
-TEST_P(myriadLayersTestsSigmoid_nightly, TestsSigmoid)
+TEST_P(myriadLayersTestsSigmoid_smoke, TestsSigmoid)
{
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
{{1, 3, 277, 230}}
};
-class myriadLayersTestsMaxPoolingWithSigmoid_nightly: public PoolingTest<POOLING_MAX>{
+class myriadLayersTestsMaxPoolingWithSigmoid_smoke: public PoolingTest<POOLING_MAX>{
};
-class myriadLayersTestsAvgPoolingWithSigmoid_nightly: public PoolingTest<POOLING_AVG>{
+class myriadLayersTestsAvgPoolingWithSigmoid_smoke: public PoolingTest<POOLING_AVG>{
};
-TEST_P(myriadLayersTestsMaxPoolingWithSigmoid_nightly, TestsMaxPoolingWithSigmoid)
+TEST_P(myriadLayersTestsMaxPoolingWithSigmoid_smoke, TestsMaxPoolingWithSigmoid)
{
_testNet.addLayer(LayerInitParams("Sigmoid")
.in({_output_tensor})
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_SIGMOID);
}
-TEST_P(myriadLayersTestsAvgPoolingWithSigmoid_nightly, TestsAvgPoolingWithSigmoid)
+TEST_P(myriadLayersTestsAvgPoolingWithSigmoid_smoke, TestsAvgPoolingWithSigmoid)
{
_testNet.addLayer(LayerInitParams("Sigmoid")
.in({_output_tensor})
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_SIGMOID);
}
-class myriadLayerConvolutionWithSigmoid_nightly: public ConvolutionTest<IRVersion>{
+class myriadLayerConvolutionWithSigmoid_smoke: public ConvolutionTest<IRVersion>{
};
-TEST_P(myriadLayerConvolutionWithSigmoid_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionWithSigmoid_smoke, Convolution) {
_irVersion = std::get<6>(GetParam());
_testNet.addLayer(LayerInitParams("Sigmoid")
.in({_output_tensor})
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), maxerr);
}
-class myriadLayerFullyConnectedWithSigmoid_nightly: public FCTest<>{
+class myriadLayerFullyConnectedWithSigmoid_smoke: public FCTest<>{
};
-TEST_P(myriadLayerFullyConnectedWithSigmoid_nightly, TestsFullyConnected)
+TEST_P(myriadLayerFullyConnectedWithSigmoid_smoke, TestsFullyConnected)
{
_testNet.addLayer(LayerInitParams("Sigmoid")
.in({_output_tensor})
#include "myriad_layers_slice_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSlice_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSlice_smoke,
::testing::Values<SliceTestParams>(
MAKE_STRUCT(SliceParams, {4, 8, 16, 32, 64}, {{4, 8, 16, 10, 64}, {4, 8, 16, 22, 64}}, 3),
MAKE_STRUCT(SliceParams, {4, 8, 16, 32}, {{4, 8, 2, 32}, {4, 8, 14, 32}}, 2))
PRETTY_PARAM(SliceTestParams, SliceParams);
-typedef myriadLayerTestBaseWithParam<SliceTestParams> myriadLayersTestsSlice_nightly;
+typedef myriadLayerTestBaseWithParam<SliceTestParams> myriadLayersTestsSlice_smoke;
-TEST_P(myriadLayersTestsSlice_nightly, Slice) {
+TEST_P(myriadLayersTestsSlice_smoke, Slice) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
const SliceParams testParams = GetParam();
#include "myriad_layers_softmax_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsSoftMax_nightly,
+ accuracy, myriadLayersTestsSoftMax_smoke,
::testing::Combine(
::testing::ValuesIn(s_softMaxTensors)
, ::testing::Values<IRVersion>(IRVersion::v7, IRVersion::v10)
*os << "axis=" << p.axis << ", sizes=" << testing::PrintToString(p.sizes);
}
-using myriadLayersTestsSoftMaxParams_nightly = myriadLayerTestBaseWithParam<std::tuple<SoftmaxAxisSizes, IRVersion>>;
+using myriadLayersTestsSoftMaxParams_smoke = myriadLayerTestBaseWithParam<std::tuple<SoftmaxAxisSizes, IRVersion>>;
-class myriadLayersTestsSoftMax_nightly: public myriadLayersTestsSoftMaxParams_nightly {
+class myriadLayersTestsSoftMax_smoke: public myriadLayersTestsSoftMaxParams_smoke {
protected:
SoftmaxAxisSizes _testingInput;
void SetUp() override {
- myriadLayersTestsSoftMaxParams_nightly::SetUp();
+ myriadLayersTestsSoftMaxParams_smoke::SetUp();
_testingInput = std::get<0>(GetParam());
_irVersion = std::get<1>(GetParam());
}
};
-TEST_P(myriadLayersTestsSoftMax_nightly, TestsSoftMax)
+TEST_P(myriadLayersTestsSoftMax_smoke, TestsSoftMax)
{
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
SetInputTensors({_testingInput.sizes});
#include "myriad_layers_split_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSplit_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSplit_smoke,
::testing::Values<SplitTestParams>(
MAKE_STRUCT(SplitParams, {4, 8, 16, 32, 64}, 2, 6),
MAKE_STRUCT(SplitParams, {4, 8, 16, 32}, 2, 6),
PRETTY_PARAM(SplitTestParams, SplitParams);
-typedef myriadLayerTestBaseWithParam<SplitTestParams> myriadLayersTestsSplit_nightly;
+typedef myriadLayerTestBaseWithParam<SplitTestParams> myriadLayersTestsSplit_smoke;
-TEST_P(myriadLayersTestsSplit_nightly, Split) {
+TEST_P(myriadLayersTestsSplit_smoke, Split) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
const SplitParams testParams = GetParam();
#include "myriad_layers_squeeze_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC1,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC1_smoke,
::testing::Combine(
::testing::ValuesIn(s_squeezeTensorsTC1),
::testing::ValuesIn(s_squeezeIndicesTC1),
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC2,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC2_smoke,
::testing::Combine(
::testing::ValuesIn(s_squeezeTensorsTC2),
::testing::ValuesIn(s_squeezeIndicesTC2),
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC3,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC3_smoke,
::testing::Combine(
::testing::ValuesIn(s_squeezeTensorsTC3),
::testing::ValuesIn(s_squeezeIndicesTC3),
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC4,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC4_smoke,
::testing::Combine(
::testing::ValuesIn(s_squeezeTensorsTC4),
::testing::ValuesIn(s_squeezeIndicesTC4),
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC5,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC5_smoke,
::testing::Combine(
::testing::ValuesIn(s_squeezeTensorsTC5),
::testing::ValuesIn(s_squeezeIndicesTC5),
}
};
-class myriadLayersTestsSqueezeTC1 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC1_smoke : public myriadLayersTestsSqueezeBase
{
};
-class myriadLayersTestsSqueezeTC2 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC2_smoke : public myriadLayersTestsSqueezeBase
{
};
-class myriadLayersTestsSqueezeTC3 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC3_smoke : public myriadLayersTestsSqueezeBase
{
};
-class myriadLayersTestsSqueezeTC4 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC4_smoke : public myriadLayersTestsSqueezeBase
{
};
-class myriadLayersTestsSqueezeTC5 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC5_smoke : public myriadLayersTestsSqueezeBase
{
};
-TEST_P(myriadLayersTestsSqueezeTC1, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC1_smoke, Squeeze) {
DISABLE_IF(!CheckMyriadX());
InitBody();
}
-TEST_P(myriadLayersTestsSqueezeTC2, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC2_smoke, Squeeze) {
DISABLE_IF(!CheckMyriadX());
InitBody();
}
-TEST_P(myriadLayersTestsSqueezeTC3, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC3_smoke, Squeeze) {
DISABLE_IF(!CheckMyriadX());
InitBody();
}
-TEST_P(myriadLayersTestsSqueezeTC4, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC4_smoke, Squeeze) {
DISABLE_IF(!CheckMyriadX());
InitBody();
}
-TEST_P(myriadLayersTestsSqueezeTC5, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC5_smoke, Squeeze) {
DISABLE_IF(!CheckMyriadX());
InitBody();
}
#include "myriad_layers_strided_slice_test.h"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsStridedSlice_nightly,
+ accuracy, myriadLayersTestsStridedSlice_smoke,
::testing::ValuesIn(s_stridedSliceParams));
InferenceEngine::SizeVector out_shape;
};
-class myriadLayersTestsStridedSlice_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsStridedSlice_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<strided_slice_test_param> {
public:
std::string model_t = R"V0G0N(
}
};
-TEST_P(myriadLayersTestsStridedSlice_nightly, TestsStridedSlice) {
+TEST_P(myriadLayersTestsStridedSlice_smoke, TestsStridedSlice) {
auto p = ::testing::WithParamInterface<strided_slice_test_param>::GetParam();
std::string model = getModel(p);
#include "myriad_layers_tanh_test.hpp"
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayersTestsTanh_nightly,
+ accuracy, myriadLayersTestsTanh_smoke,
::testing::ValuesIn(s_tanhParams));
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithTanH_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithTanH_smoke,
::testing::Combine(
::testing::ValuesIn(g_convolutionTensors)
, ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
)
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithTanh_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithTanh_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsLite),
::testing::ValuesIn(g_poolingLayout))
);
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithTanh_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithTanh_smoke,
::testing::Combine(
::testing::ValuesIn(g_poolingInput),
::testing::ValuesIn(g_poolingLayerParamsLite),
);
INSTANTIATE_TEST_CASE_P(
- accuracy, myriadLayerFullyConnectedWithTanH_nightly,
+ accuracy, myriadLayerFullyConnectedWithTanH_smoke,
::testing::Combine(
::testing::ValuesIn(g_fcTestParamsSubset),
::testing::Values(g_dimensionsFC[0]),
#define ERROR_BOUND_WITH_TANH (1.0e-3f)
using namespace InferenceEngine;
-class myriadLayersTestsTanh_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsTanh_smoke: public myriadLayersTests_nightly,
public testing::WithParamInterface<SizeVector> {
};
-TEST_P(myriadLayersTestsTanh_nightly, TestsTanh)
+TEST_P(myriadLayersTestsTanh_smoke, TestsTanh)
{
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
auto p = ::testing::WithParamInterface<SizeVector>::GetParam();
{{1, 16, 8, 8}, 8, 0.065f}
};
-class myriadLayerConvolutionWithTanH_nightly: public ConvolutionTest<IRVersion>{
+class myriadLayerConvolutionWithTanH_smoke: public ConvolutionTest<IRVersion>{
};
-TEST_P(myriadLayerConvolutionWithTanH_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionWithTanH_smoke, Convolution) {
auto param = GetParam();
_irVersion = std::get<6>(param);
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), maxerr);
}
-class myriadLayersTestsMaxPoolingWithTanh_nightly: public PoolingTest<POOLING_MAX>{
+class myriadLayersTestsMaxPoolingWithTanh_smoke: public PoolingTest<POOLING_MAX>{
};
-class myriadLayersTestsAvgPoolingWithTanh_nightly: public PoolingTest<POOLING_AVG>{
+class myriadLayersTestsAvgPoolingWithTanh_smoke: public PoolingTest<POOLING_AVG>{
};
-TEST_P(myriadLayersTestsMaxPoolingWithTanh_nightly, TestsMaxPoolingWithTanh)
+TEST_P(myriadLayersTestsMaxPoolingWithTanh_smoke, TestsMaxPoolingWithTanh)
{
_testNet.addLayer(LayerInitParams("TanH")
.in({_output_tensor})
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_TANH);
}
-TEST_P(myriadLayersTestsAvgPoolingWithTanh_nightly, TestsAvgPoolingWithTanh)
+TEST_P(myriadLayersTestsAvgPoolingWithTanh_smoke, TestsAvgPoolingWithTanh)
{
_testNet.addLayer(LayerInitParams("TanH")
.in({_output_tensor})
CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_TANH);
}
-class myriadLayerFullyConnectedWithTanH_nightly: public FCTest<>{
+class myriadLayerFullyConnectedWithTanH_smoke: public FCTest<>{
};
-TEST_P(myriadLayerFullyConnectedWithTanH_nightly, TestsFullyConnected)
+TEST_P(myriadLayerFullyConnectedWithTanH_smoke, TestsFullyConnected)
{
_testNet.addLayer(LayerInitParams("TanH")
.in({_output_tensor})
+++ /dev/null
-// Copyright (C) 2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "vpu_layers_tests.hpp"
-#include "vpu_case_params.hpp"
-#include "common/include/vpu/utils/error.hpp"
-
-#include "single_layer_common.hpp"
-
-#include "gtest/gtest.h"
-
-#include <string>
-#include <ngraph_functions/subgraph_builders.hpp>
-#include <common_test_utils/test_common.hpp>
-#include <functional_test_utils/blob_utils.hpp>
-#include <vpu_case_common.hpp>
-
-namespace {
-
-class MyriadLayersTestsTensorIterator : public CommonTestUtils::TestsCommon {
-public:
- void SetUp() override {
- fn_ptr = ngraph::builder::subgraph::makeTIwithLSTMcell();
- }
-protected:
- std::shared_ptr<ngraph::Function> fn_ptr;
-};
-
-// TODO: Issue: 29485
-TEST_F(MyriadLayersTestsTensorIterator, CompareNativeVersionWithUnrolledLoop) {
- DISABLE_IF(!CheckMyriadX () && !CheckMA2085());
- CNNNetwork network(fn_ptr);
- network.getInputsInfo().begin()->second->setPrecision(Precision::FP16);
-
-
- auto ie = PluginCache::get().ie();
-
- ExecutableNetwork exeNetworkWithConfig = ie->LoadNetwork(network, CommonTestUtils::DEVICE_MYRIAD,
- {{VPU_CONFIG_KEY(FORCE_PURE_TENSOR_ITERATOR), CONFIG_VALUE(NO)},
- {VPU_CONFIG_KEY(ENABLE_TENSOR_ITERATOR_UNROLLING), CONFIG_VALUE(YES)}});
- InferRequest inferRequestWithConfig = exeNetworkWithConfig.CreateInferRequest();
- auto blobWithConfig = FuncTestUtils::createAndFillBlob(network.getInputsInfo().begin()->second->getTensorDesc());
- inferRequestWithConfig.SetBlob(network.getInputsInfo().begin()->first, blobWithConfig);
- inferRequestWithConfig.Infer();
- auto* outRawDataWithConfig = inferRequestWithConfig.GetBlob(network.getOutputsInfo().begin()->first)->cbuffer().as<float*>();
-
- ExecutableNetwork exeNetworkWithoutConfig = ie->LoadNetwork(network, CommonTestUtils::DEVICE_MYRIAD,
- {{VPU_CONFIG_KEY(FORCE_PURE_TENSOR_ITERATOR), CONFIG_VALUE(YES)},
- {VPU_CONFIG_KEY(ENABLE_TENSOR_ITERATOR_UNROLLING), CONFIG_VALUE(NO)}});
- InferRequest inferRequestWithoutConfig = exeNetworkWithoutConfig.CreateInferRequest();
- auto blobWithoutConfig = FuncTestUtils::createAndFillBlob(network.getInputsInfo().begin()->second->getTensorDesc());
- inferRequestWithoutConfig.SetBlob(network.getInputsInfo().begin()->first, blobWithoutConfig);
- inferRequestWithoutConfig.Infer();
- auto* outRawDataWithoutConfig = inferRequestWithoutConfig.GetBlob(network.getOutputsInfo().begin()->first)->cbuffer().as<float*>();
-
- auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP16);
- size_t outElementsCount = std::accumulate(begin(fn_ptr->get_output_shape(0)), end(fn_ptr->get_output_shape(0)), 1,
- std::multiplies<size_t>());
-
- FuncTestUtils::compareRawBuffers(outRawDataWithoutConfig, outRawDataWithConfig, outElementsCount,
- outElementsCount,
- thr);
-
-}
-}
#include "myriad_layers_tile_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracyAdd, myriadLayerTestTile_nightly,
+INSTANTIATE_TEST_CASE_P(accuracyAdd, myriadLayerTestTile_smoke,
::testing::Combine(
::testing::Values<test_params>(
MAKE_STRUCT(tile_test::nd_tensor_test_params, {4, 5, 6}, 0)
, ::testing::Values<tiles>(2, 3, 5)
));
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestTile_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestTile_smoke,
::testing::Combine(
::testing::Values<test_params>(
MAKE_STRUCT(tile_test::nd_tensor_test_params, {4, 5, 6}, 1)
}
}
-typedef myriadLayerTestBaseWithParam<tuple<test_params, tiles>> myriadLayerTestTile_nightly;
+typedef myriadLayerTestBaseWithParam<tuple<test_params, tiles>> myriadLayerTestTile_smoke;
-TEST_P(myriadLayerTestTile_nightly, Tile) {
+TEST_P(myriadLayerTestTile_smoke, Tile) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
tile_test::nd_tensor_test_params input_dims = get<0>(GetParam());
// "none", // currently is not supported by firmware
};
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsTopK_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsTopK_smoke,
::testing::Combine(
::testing::ValuesIn(s_geometries_list),
::testing::ValuesIn(s_modes_list),
class TopKTest: public myriadLayerTestBaseWithParam<TopKTestParams>
{
protected:
- std::set<std::string> getExecutedStagesTypes() const {
- std::set<std::string> result;
- std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap;
- _inferRequest->GetPerformanceCounts(perfMap, nullptr);
-
- for (const auto& perf : perfMap)
- result.emplace(perf.second.exec_type);
-
- return result;
- }
-
void testTopK(const IRVersion irVersion, const bool outputValues, const bool outputIndices) {
_config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
- const auto executedTypes = getExecutedStagesTypes();
-
- // This logic must be synchronized with TopKStage class.
- const bool useArgMaxOptimization = (!outputValues || !outputIndices)
- && mode == "max"
- && ((sort == "value" && outputValues) || (sort == "index" && outputIndices));
-
- ASSERT_EQ(executedTypes.count("ArgMax"), useArgMaxOptimization);
- ASSERT_EQ(executedTypes.count("TopK"), !useArgMaxOptimization);
-
Blob::Ptr outputValuesBlob, outputIndicesBlob;
if (outputValues) {
ASSERT_NO_THROW(st = _inferRequest->GetBlob("topk.0", outputValuesBlob, &_resp));
}
};
-class myriadTestsTopK_nightly: public TopKTest
+class myriadTestsTopK_smoke: public TopKTest
{
};
-TEST_P(myriadTestsTopK_nightly, TopKv7)
+TEST_P(myriadTestsTopK_smoke, TopKv7)
{
testTopK(IRVersion::v7, true, true);
}
-TEST_P(myriadTestsTopK_nightly, TopKv10_All)
+TEST_P(myriadTestsTopK_smoke, TopKv10_All)
{
testTopK(IRVersion::v10, true, true);
}
-TEST_P(myriadTestsTopK_nightly, TopKv10_ArgMaxValues)
+TEST_P(myriadTestsTopK_smoke, TopKv10_ArgMaxValues)
{
testTopK(IRVersion::v10, true, false);
}
-TEST_P(myriadTestsTopK_nightly, TopKv10_ArgMaxIndices)
+TEST_P(myriadTestsTopK_smoke, TopKv10_ArgMaxIndices)
{
testTopK(IRVersion::v10, false, true);
}
#include "myriad_layers_unsqueeze_test.hpp"
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsUnsqueeze,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsUnsqueeze_smoke,
::testing::Combine(
::testing::ValuesIn(s_squeezeTensors),
::testing::ValuesIn(s_squeezeIndices)
using namespace InferenceEngine;
typedef std::vector<int32_t> IndicesVector;
-typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, IndicesVector>> myriadLayersTestsUnsqueeze;
+typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, IndicesVector>> myriadLayersTestsUnsqueeze_smoke;
static void ref_unsqueeze(const InferenceEngine::Blob::Ptr src,
InferenceEngine::Blob::Ptr dst) {
ie_memcpy(dst_data, dst_size * sizeof(ie_fp16), src_data, src_size * sizeof(ie_fp16));
}
-TEST_P (myriadLayersTestsUnsqueeze, Unsqueeze){
+TEST_P(myriadLayersTestsUnsqueeze_smoke, Unsqueeze){
auto input = std::get<0>(GetParam());
auto indices = std::get<1>(GetParam());
#include "pool_ref.hpp"
#include "ie_memcpy.h"
#include <single_layer_common.hpp>
+#include <vpu/model/data_desc.hpp>
#include "common_test_utils/common_layers_params.hpp"
#include "vpu/utils/error.hpp"
for (int i = 1; i < ndims; ++i)
offset = offset * dims[i] + indices[i];
}
-
return data[offset];
}
}
}
-template<>
-void ref_reduce<ie_fp16>(const Blob::Ptr& in,
- const Blob::Ptr& axes,
- Blob::Ptr& out,
- int keep_dims,
- IReduceKernel<ie_fp16>* op)
+template void ref_reduce(const Blob::Ptr& in,
+ const Blob::Ptr& axes,
+ Blob::Ptr& out,
+ int keep_dims,
+ vpu::LayoutPreference layoutPreference,
+ IReduceKernel<ie_fp16>* op);
+
+template void ref_reduce(const Blob::Ptr& in,
+ const Blob::Ptr& axes,
+ Blob::Ptr& out,
+ int keep_dims,
+ vpu::LayoutPreference layoutPreference,
+ IReduceKernel<int32_t>* op);
+
+template<typename DataType>
+void ref_reduce(const Blob::Ptr& in,
+ const Blob::Ptr& axes,
+ Blob::Ptr& out,
+ int keep_dims,
+ vpu::LayoutPreference layoutPreference,
+ IReduceKernel<DataType>* op)
{
ASSERT_NE(in, nullptr);
ASSERT_NE(axes, nullptr);
ASSERT_NE(out, nullptr);
- const int16_t* inData = in->cbuffer().as<const int16_t*>();
- int16_t* outData = out->buffer().as<int16_t*>();
-
- ASSERT_NE(inData, nullptr);
- ASSERT_NE(outData, nullptr);
-
const auto axesDims = axes->getTensorDesc().getDims();
ASSERT_EQ(axesDims.size(), 1);
const auto axesSize = axesDims[0];
- const int32_t* axesData = axes->cbuffer().as<const int32_t*>();
- ASSERT_TRUE(!(axesSize > 0) || (axesData != nullptr));
+ int32_t* axesData = axes->cbuffer().as<int32_t*>();
- reduceImpl::refReduce(in, out, axesSize, axesData, keep_dims, op);
-}
+ if (layoutPreference == vpu::LayoutPreference::ChannelMinor) {
+ auto inDims = in->getTensorDesc().getDims();
+ const auto ndims = inDims.size();
+ auto newDims = inDims;
-template<>
-void ref_reduce<int32_t>(const Blob::Ptr& in,
- const Blob::Ptr& axes,
- Blob::Ptr& out,
- int keep_dims,
- IReduceKernel<int32_t>* op)
-{
- ASSERT_NE(in, nullptr);
- ASSERT_NE(axes, nullptr);
- ASSERT_NE(out, nullptr);
+ const auto dimsOrder = vpu::DimsOrder::fromLayout(in->getTensorDesc().getLayout());
+ const auto defPerm = vpu::DimsOrder::fromNumDims(ndims).toPermutation();
- const int32_t* inData = in->cbuffer().as<const int32_t*>();
- int32_t* outData = out->buffer().as<int32_t*>();
+ for (int i = 0; i < ndims; ++i) {
+ auto newInd = ndims - 1 - dimsOrder.dimInd(defPerm[ndims - i - 1]);
+ newDims[newInd] = inDims[i];
+ }
- ASSERT_NE(inData, nullptr);
- ASSERT_NE(outData, nullptr);
+ in->getTensorDesc().setDims(newDims);
- const auto axesDims = axes->getTensorDesc().getDims();
- ASSERT_EQ(axesDims.size(), 1);
+ for (int i = 0; i < axesSize; ++i) {
+ axesData[i] = ndims - 1 - dimsOrder.dimInd(defPerm[ndims - axesData[i] - 1]);
+ newDims[axesData[i]] = keep_dims ? 1 : 0;
+ }
+
+ if (!keep_dims) {
+ newDims.erase(std::remove(newDims.begin(), newDims.end(), 0), newDims.end());
+ }
+
+ out->getTensorDesc().setDims(newDims);
+ }
- const auto axesSize = axesDims[0];
- const int32_t* axesData = axes->cbuffer().as<const int32_t*>();
ASSERT_TRUE(!(axesSize > 0) || (axesData != nullptr));
reduceImpl::refReduce(in, out, axesSize, axesData, keep_dims, op);
#include <iomanip> // std::setw
+#include <vpu/utils/ie_helpers.hpp>
+#include <graph_transformer/include/vpu/model/data_desc.hpp>
+
typedef std::map<std::string, std::string> ParamsStruct;
typedef float (*eltwise_kernel)(float a, float b, float c);
const InferenceEngine::Blob::Ptr& axes,
InferenceEngine::Blob::Ptr& dst,
int keep_dims,
+ vpu::LayoutPreference layoutPreference,
IReduceKernel<DataType>* op);
void ref_topk(const InferenceEngine::Blob::Ptr& srcValues,
}
}
+bool vpuLayersTests::wasCustomLayerInferred() const {
+ auto perfMap = std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>{};
+ _inferRequest->GetPerformanceCounts(perfMap, nullptr);
+ const auto isCustomLayer = [&](const std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>& info) {
+ return !strcmp(info.second.exec_type, "Custom");
+ };
+ return std::any_of(begin(perfMap), end(perfMap), isCustomLayer);
+}
+
namespace {
template<class TensorDescriptor>
return false;
const auto st = _inferRequest->Infer(&_resp);
EXPECT_EQ(InferenceEngine::StatusCode::OK, st) << _resp.msg;
- //dumpPerformance();
+// dumpPerformance();
+ if (!_config[VPU_CONFIG_KEY(CUSTOM_LAYERS)].empty()) {
+ EXPECT_TRUE(wasCustomLayerInferred())
+ << "CustomBindings.xml has been provided but Custom layer was not inferred";
+ }
return true;
}
void TearDown() override;
bool CheckMyriadX();
void dumpPerformance();
+ bool wasCustomLayerInferred() const;
// For historical reasons, gen-blob functions use to 'hack' blob layout:
// replace NCHW with NHWC even if you explicitly setup layout preference
endfunction()
add_helpers(${TARGET_NAME})
+
target_link_libraries(${TARGET_NAME} PUBLIC commonTestUtils)
add_helpers(${TARGET_NAME}_s USE_STATIC_IE)
+
target_link_libraries(${TARGET_NAME}_s PUBLIC commonTestUtils_s)
+
+if (ENABLE_DATA)
+ add_dependencies(${TARGET_NAME} data)
+ add_dependencies(${TARGET_NAME}_s data)
+endif()
#pragma once
#include <ie_blob.h>
+#include <ie_core.hpp>
#include <ie_layers_property.hpp>
#include <precision_utils.h>
#include <common_test_utils/xml_net_builder/xml_net_builder.hpp>
};
template<int Version = 3>
-inline InferenceEngine::details::CNNNetworkImplPtr
-buildSingleLayerNetworkCommon(InferenceEngine::details::IFormatParser *parser,
- const std::string &layerType,
+inline InferenceEngine::CNNNetwork
+buildSingleLayerNetworkCommon(const std::string &layerType,
const CommonTestUtils::InOutShapes &inOutShapes,
std::map<std::string, std::string> *params,
const std::string &layerDataName = "data",
size_t weightsSize = 0,
size_t biasesSize = 0,
const InferenceEngine::TBlob<uint8_t>::Ptr &weights = nullptr) {
- IE_ASSERT(parser);
- testing::XMLHelper xmlHelper(parser);
std::string precisionStr = precision.name();
auto netBuilder = CommonTestUtils::XmlNetBuilder<Version>::buildNetworkWithOneInput("Mock", inOutShapes.inDims[0],
precisionStr);
} else {
testContent = netBuilder.finish();
}
- xmlHelper.loadContent(testContent);
- auto result = xmlHelper.parseWithReturningNetwork();
- if (weights) xmlHelper.setWeights(weights);
- return result;
+
+ InferenceEngine::Core ie;
+ return ie.ReadNetwork(testContent, weights);
}
void GenRandomDataCommon(InferenceEngine::Blob::Ptr blob);
+++ /dev/null
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-/**
-* \brief TODO: short file description
-* \file file_utils.h
-*/
-#pragma once
-
-#include <string>
-#include <gtest/gtest.h>
-
-namespace {
- bool strContains(const std::string & str, const std::string & substr) {
- return str.find(substr) != std::string::npos;
- }
- bool strDoesnotContain(const std::string & str, const std::string & substr) {
- (void)strDoesnotContain; // to overcome unused warning
- return !strContains(str, substr);
- }
-}
-
-#define ASSERT_STR_CONTAINS(str, substr) ASSERT_PRED2(&strContains, str, substr)
-#define ASSERT_STR_DOES_NOT_CONTAIN(str, substr) ASSERT_PRED2 (&strDoesnotContain, str, substr)
-#define EXPECT_STR_CONTAINS(str, substr) EXPECT_PRED2(&strContains, str, substr)
\ No newline at end of file
}
void MockPlugin::GetVersion(const Version *&versionInfo) noexcept {
+ versionInfo = &version;
}
StatusCode MockPlugin::AddExtension(IExtensionPtr extension, InferenceEngine::ResponseDesc *resp) noexcept {
IE_SUPPRESS_DEPRECATED_START
class MockPlugin : public InferenceEngine::IInferencePlugin {
InferenceEngine::IInferencePlugin * _target = nullptr;
+ InferenceEngine::Version version;
public:
explicit MockPlugin(InferenceEngine::IInferencePlugin*target);
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
endif()
-add_test(NAME ${TARGET_NAME}
- COMMAND ${TARGET_NAME})
+add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+set_property(TEST ${TARGET_NAME} PROPERTY LABELS IE)
add_dependencies(${TARGET_NAME} mock_engine)
//
#include <gtest/gtest.h>
-#include <parsers.h>
#include <ie_cnn_net_reader_impl.h>
#include <gmock/gmock-more-actions.h>
#include "cnn_network_impl.hpp"
ASSERT_EQ(hwOutput->parentData(), outputCopy1);
ASSERT_EQ(hwOutput->numConsumers(), 2);
- ASSERT_TRUE(contains(hwOutput->consumers(), [](const Stage& stage) { return stage->type() == StageType::Concat; }));
+ ASSERT_TRUE(contains(hwOutput->consumers(), [](const Stage& stage) { return stage->type() == StageType::StubConcat; }));
ASSERT_TRUE(contains(hwOutput->consumers(), [](const Stage& stage) { return stage->type() == StageType::Copy; }));
}
#include <gtest/gtest.h>
#include <graph_tools.hpp>
-#include "test_assertions.hpp"
+#include <common_test_utils/test_assertions.hpp>
#include <unordered_set>
#include <gmock/gmock-generated-function-mockers.h>
#include <gmock/gmock-generated-matchers.h>
using namespace InferenceEngine;
-class CNNNetworkTests : public ::testing::Test {
-protected:
- virtual void TearDown() {
- }
-
- virtual void SetUp() {
- }
-
-public:
-
-};
+using CNNNetworkTests = ::testing::Test;
TEST_F(CNNNetworkTests, throwsOnInitWithNull) {
std::shared_ptr<ICNNNetwork> nlptr = nullptr;
ASSERT_THROW(CNNNetwork network(nlptr), InferenceEngine::details::InferenceEngineException);
}
+
+TEST_F(CNNNetworkTests, throwsOnInitWithNullNgraph) {
+ std::shared_ptr<const ngraph::Function> nlptr = nullptr;
+ ASSERT_THROW(CNNNetwork network(nlptr), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetPrecision) {
+ CNNNetwork network;
+ ASSERT_THROW(network.getPrecision(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetOutputsInfo) {
+ CNNNetwork network;
+ ASSERT_THROW(network.getOutputsInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetInputsInfo) {
+ CNNNetwork network;
+ ASSERT_THROW(network.getInputsInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedLayerCount) {
+ CNNNetwork network;
+ ASSERT_THROW(network.layerCount(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetName) {
+ CNNNetwork network;
+ ASSERT_THROW(network.getName(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedCastToICNNNetwork) {
+ CNNNetwork network;
+ ASSERT_THROW(auto & net = static_cast<ICNNNetwork&>(network), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnConstUninitializedCastToICNNNetwork) {
+ const CNNNetwork network;
+ ASSERT_THROW(const auto & net = static_cast<const ICNNNetwork&>(network), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetFunction) {
+ CNNNetwork network;
+ ASSERT_THROW(network.getFunction(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnConstUninitializedGetFunction) {
+ const CNNNetwork network;
+ ASSERT_THROW(network.getFunction(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnConstUninitializedBegin) {
+ CNNNetwork network;
+ ASSERT_THROW(network.getFunction(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnConstUninitializedGetInputShapes) {
+ CNNNetwork network;
+ ASSERT_THROW(network.getInputShapes(), InferenceEngine::details::InferenceEngineException);
+}
auto exceptionMessage = getExceptionMessage([&]() { InferRequest->SetInput(blobMap); });
ASSERT_EQ(_inputDataIsEmptyError, exceptionMessage.substr(0, _inputDataIsEmptyError.size()));
}
+
+using InferRequestCPPTests = ::testing::Test;
+
+TEST_F(InferRequestCPPTests, throwsOnInitWithNull) {
+ IInferRequest::Ptr nlptr = nullptr;
+ ASSERT_THROW(InferRequest req(nlptr), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetBlob) {
+ InferRequest req;
+ ASSERT_THROW(req.SetBlob({}, {}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedGetBlob) {
+ InferRequest req;
+ ASSERT_THROW(req.GetBlob({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetBlobPreproc) {
+ InferRequest req;
+ ASSERT_THROW(req.SetBlob({}, {}, {}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedGetPreProcess) {
+ InferRequest req;
+ ASSERT_THROW(req.GetPreProcess({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedInfer) {
+ InferRequest req;
+ ASSERT_THROW(req.Infer(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedGetPerformanceCounts) {
+ InferRequest req;
+ ASSERT_THROW(req.GetPerformanceCounts(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetInput) {
+ InferRequest req;
+ ASSERT_THROW(req.SetInput({{}}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetOutput) {
+ InferRequest req;
+ ASSERT_THROW(req.SetOutput({{}}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetBatch) {
+ InferRequest req;
+ ASSERT_THROW(req.SetBatch({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedStartAsync) {
+ InferRequest req;
+ ASSERT_THROW(req.StartAsync(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedWait) {
+ InferRequest req;
+ ASSERT_THROW(req.Wait({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetCompletionCallback) {
+ InferRequest req;
+ std::function<void(InferRequest, StatusCode)> f;
+ ASSERT_THROW(req.SetCompletionCallback(f), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedCast) {
+ InferRequest req;
+ ASSERT_THROW(auto & ireq = static_cast<IInferRequest::Ptr&>(req), InferenceEngine::details::InferenceEngineException);
+}
EXPECT_CALL(*mock_impl.get(), Export(_)).WillOnce(Throw(5));
ASSERT_EQ(UNEXPECTED, exeNetwork->Export({}, nullptr));
}
+
+using ExecutableNetworkTests = ::testing::Test;
+
+TEST_F(ExecutableNetworkTests, throwsOnInitWithNull) {
+ std::shared_ptr<IExecutableNetwork> nlptr = nullptr;
+ ASSERT_THROW(ExecutableNetwork exec(nlptr), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetOutputsInfo) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.GetOutputsInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetInputsInfo) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.GetInputsInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedExport) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.Export(std::string()), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedExportStream) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.Export(std::cout), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, nothrowsOnUninitializedCast) {
+ ExecutableNetwork exec;
+ ASSERT_NO_THROW(auto & enet = static_cast<IExecutableNetwork::Ptr&>(exec));
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetExecGraphInfo) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.GetExecGraphInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedQueryState) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.QueryState(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedSetConfig) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.SetConfig({{}}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetConfig) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.GetConfig({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetMetric) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.GetMetric({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetContext) {
+ ExecutableNetwork exec;
+ ASSERT_THROW(exec.GetContext(), InferenceEngine::details::InferenceEngineException);
+}
mockNotEmptyNet.getOutputsInfo(outputsInfo);
mockInferRequestInternal = make_shared<MockInferRequestInternal>(inputsInfo, outputsInfo);
mockExeNetworkTS = make_shared<MockExecutableNetworkThreadSafe>();
- EXPECT_CALL(*mock_plugin_impl.get(), LoadExeNetworkImpl(_, _, _)).WillOnce(Return(mockExeNetworkTS));
+ EXPECT_CALL(*mock_plugin_impl.get(), LoadExeNetworkImpl(_, _)).WillOnce(Return(mockExeNetworkTS));
EXPECT_CALL(*mockExeNetworkTS.get(), CreateInferRequestImpl(_, _)).WillOnce(Return(mockInferRequestInternal));
sts = plugin->LoadNetwork(exeNetwork, mockNotEmptyNet, {}, &dsc);
ASSERT_EQ((int) StatusCode::OK, sts) << dsc.msg;
#include <gtest/gtest.h>
#include <gmock/gmock-spec-builders.h>
#include <ie_version.hpp>
+#include <cpp/ie_plugin_cpp.hpp>
#include "cpp_interfaces/base/ie_plugin_base.hpp"
#include "unit_test_utils/mocks/cpp_interfaces/mock_plugin_impl.hpp"
const std::map <std::string, std::string> config;
ASSERT_EQ(UNEXPECTED, plugin->SetConfig(config, nullptr));
}
+
+using InferencePluginTests = testing::Test;
+
+TEST_F(InferencePluginTests, throwsOnNullptrCreation) {
+ InferenceEnginePluginPtr nulptr;
+ InferencePlugin plugin;
+ ASSERT_THROW(plugin = InferencePlugin(nulptr), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedGetVersion) {
+ InferencePlugin plg;
+ ASSERT_THROW(plg.GetVersion(), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedLoadNetwork) {
+ InferencePlugin plg;
+ QueryNetworkResult r;
+ ASSERT_THROW(plg.LoadNetwork(CNNNetwork(), {}), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedImportNetwork) {
+ InferencePlugin plg;
+ ASSERT_THROW(plg.ImportNetwork({}, {}), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedAddExtension) {
+ InferencePlugin plg;
+ ASSERT_THROW(plg.AddExtension(IExtensionPtr()), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedSetConfig) {
+ InferencePlugin plg;
+ ASSERT_THROW(plg.SetConfig({{}}), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, nothrowsUninitializedCast) {
+ InferencePlugin plg;
+ ASSERT_NO_THROW(auto plgPtr = static_cast<InferenceEnginePluginPtr>(plg));
+}
#include "tests_common.hpp"
#include "unit_test_utils/mocks/mock_allocator.hpp"
-using namespace std;
-
-class LockedMemoryTest : public TestsCommon {
-protected:
- unique_ptr<MockAllocator> createMockAllocator() {
- return unique_ptr<MockAllocator>(new MockAllocator());
- }
-};
-
using namespace InferenceEngine;
using namespace ::testing;
+using LockedMemoryTest = testing::Test;
TEST_F(LockedMemoryTest, canUnlockMemoryAfterUsage) {
-
- auto allocator = createMockAllocator();
-
+ std::unique_ptr<MockAllocator> allocator(new MockAllocator());
char array [] = {1,2,3};
EXPECT_CALL(*allocator.get(), lock((void*)1, _)).WillRepeatedly(Return((void*)array));
}
}
-
TEST_F(LockedMemoryTest, canReadFromLockedMemory) {
-
- auto allocator = createMockAllocator();
-
+ std::unique_ptr<MockAllocator> allocator(new MockAllocator());
char array [] = {1,2,3,4,5};
EXPECT_CALL(*allocator.get(), lock((void*)1, _)).WillRepeatedly(Return((void*)array));
}
}
-
TEST_F(LockedMemoryTest, canWriteToLockedMemory) {
-
- auto allocator = createMockAllocator();
-
+ std::unique_ptr<MockAllocator> allocator(new MockAllocator());
char array [] = {1,2,3,4,5};
EXPECT_CALL(*allocator.get(), lock((void*)1, _)).WillRepeatedly(Return((void*)array));
g_os_iyx_osv16, ///< format used for weights for 2D convolution
g_os_iyx_osv32, ///< format used for weights for 2D convolution
gs_oiyx_gsv16, ///< format used for weights for 2D convolution
+ gs_oizyx_gsv16, ///< format used for weights for 3D convolution
gs_oiyx_gsv32, ///< format used for weights for 2D convolution
g_is_os_zyx_osv16_isv16, ///< format used for grouped weights for blocked 3D deconvolution
g_os_is_yx_osv16_isv4,
g_os_is_zyx_isv8_osv16_isv2,
g_os_is_yx_isv8_osv16_isv2,
g_os_is_zyx_isv16_osv16,
+ g_os_zyx_is_osv16_isv4, ///< format for imad deconvolution
+ g_os_zyx_is_osv16_isv16, ///< format for imad deconvolution
+ g_os_zyx_is_osv16_isv32, ///< format for imad deconvolution
+ g_os_zyx_is_osv32_isv4, ///< format for imad deconvolution
+ g_os_zyx_is_osv32_isv16, ///< format for imad deconvolution
+ g_os_zyx_is_osv32_isv32, ///< format for imad deconvolution
format_num, ///< number of format types
any = -1
{ os_is_yx_osv32_isv32p, { 1, 1, 1, 0, 0, "bfxy", "bfxy?", {}}},
{ os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{0, 16}, {1, 16}}}},
{ is_os_zyx_osv16_isv16, { 1, 1, 3, 0, 0, "fbzyx", "bfxyz", {{0, 16}, {1, 16}}}},
+ { is_os_yx_osv16_isv16, { 1, 1, 2, 0, 0, "fbyx", "bfxyz", {{0, 16}, {1, 16}}}},
{ os_is_osv32_isv32_swizzled_by_4, { 1, 1, 0, 0, 0, "bfxy", "bfxy?", {{0, 32}, {1, 32}}}},
{ os_is_zyx_isv8_osv16_isv2, { 1, 1, 3, 0, 0, "bfzyx", "bfxyz", {{1, 8}, {0, 16}, {1, 2}}}},
{ os_zyxi_osv16, { 1, 1, 3, 0, 0, "bzyxf", "bfxyz", {{0, 16}}}},
{ g_os_iyx_osv16, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 16}}}},
{ g_os_iyx_osv32, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{0, 32}}}},
{ gs_oiyx_gsv16, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{8, 16}}}},
+ { gs_oizyx_gsv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{8, 16}}}},
{ gs_oiyx_gsv32, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{8, 32}}}},
{ gyxio, { 1, 1, 2, 0, 1, "gyxfb", "bfxy????g", {}}},
{ g_is_os_zyx_osv16_isv16, { 1, 1, 3, 0, 1, "gfbzyx", "bfxyz???g", {{0, 16}, {1, 16}}}},
{ g_os_is_yx_isv8_osv16_isv2, { 1, 1, 2, 0, 1, "gbfyx", "bfxy????g", {{1, 8}, {0, 16}, {1, 2}}}},
{ g_os_is_zyx_isv16_osv16, { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g", {{0, 16}, {1, 16}}}},
{ g_os_is_yx_osv16_isv4, { 1, 1, 2, 0, 1, "gbfxy", "bfxy????g", {{0, 16}, {1, 4}}}},
+ { g_os_zyx_is_osv16_isv4, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 4}}}},
+ { g_os_zyx_is_osv16_isv16, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 16}}}},
+ { g_os_zyx_is_osv16_isv32, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 16}, {1, 32}}}},
+ { g_os_zyx_is_osv32_isv4, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 4}}}},
+ { g_os_zyx_is_osv32_isv16, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 16}}}},
+ { g_os_zyx_is_osv32_isv32, { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g", {{0, 32}, {1, 32}}}},
};
return traits.at(fmt);
}
{ WeightsLayout::gs_oi_yxs_gsv16_yxsv4, { 0, 1, -1, 2, 3, -1, -1, 4 } },
{ WeightsLayout::gs_oi_yxs_gsv32_yxsv4, { 0, 1, -1, 2, 3, -1, -1, 4 } },
{ WeightsLayout::g_os_is_yx_osv16_isv4, { 0, 1, -1, 2, 3, -1, -1, 4 } },
+ { WeightsLayout::g_os_zyx_is_osv16_isv4, { 1, 2, 3, 0, 4, -1, -1, 5 } },
+ { WeightsLayout::g_os_zyx_is_osv16_isv16, { 1, 2, 3, 0, 4, -1, -1, 5 } },
+ { WeightsLayout::g_os_zyx_is_osv16_isv32, { 1, 2, 3, 0, 4, -1, -1, 5 } },
+ { WeightsLayout::g_os_zyx_is_osv32_isv4, { 1, 2, 3, 0, 4, -1, -1, 5 } },
+ { WeightsLayout::g_os_zyx_is_osv32_isv16, { 1, 2, 3, 0, 4, -1, -1, 5 } },
+ { WeightsLayout::g_os_zyx_is_osv32_isv32, { 1, 2, 3, 0, 4, -1, -1, 5 } },
}};
NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l) {
newDims[2] = RoundUp(newDims[2], 4);
newDims[3] = RoundUp(newDims[3], 16);
break;
+ case g_os_zyx_is_osv16_isv4:
+ newDims[0] = RoundUp(newDims[0], 4);
+ newDims[4] = RoundUp(newDims[4], 16);
+ break;
+ case g_os_zyx_is_osv16_isv16:
+ newDims[0] = RoundUp(newDims[0], 16);
+ newDims[4] = RoundUp(newDims[4], 16);
+ break;
+ case g_os_zyx_is_osv16_isv32:
+ newDims[0] = RoundUp(newDims[0], 32);
+ newDims[4] = RoundUp(newDims[4], 16);
+ break;
+ case g_os_zyx_is_osv32_isv4:
+ newDims[0] = RoundUp(newDims[0], 4);
+ newDims[4] = RoundUp(newDims[4], 32);
+ break;
+ case g_os_zyx_is_osv32_isv16:
+ newDims[0] = RoundUp(newDims[0], 16);
+ newDims[4] = RoundUp(newDims[4], 32);
+ break;
+ case g_os_zyx_is_osv32_isv32:
+ newDims[0] = RoundUp(newDims[0], 32);
+ newDims[4] = RoundUp(newDims[4], 32);
+ break;
default:
break;
}
vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
vec[Channelndex(l, WeightsChannelName::LX)] = LX().v;
vec[Channelndex(l, WeightsChannelName::LY)] = LY().v;
+ } else if (src_channels == 4 && dst_channels == 5) {
+ vec[Channelndex(l, WeightsChannelName::X)] = X().v;
+ vec[Channelndex(l, WeightsChannelName::Y)] = Y().v;
+ vec[Channelndex(l, WeightsChannelName::Z)] = 1;
+ vec[Channelndex(l, WeightsChannelName::IFM)] = IFM().v;
+ vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
} else {
assert(0);
}
gs_oi_yxs_gsv32_yxsv4, // grouped weights for depthwise IMAD convolution (b_fs_yx_fsv32 format)
g_os_is_yx_osv16_isv4,
+
+ g_os_zyx_is_osv16_isv4,
+ g_os_zyx_is_osv16_isv16,
+ g_os_zyx_is_osv16_isv32,
+ g_os_zyx_is_osv32_isv4,
+ g_os_zyx_is_osv32_isv16,
+ g_os_zyx_is_osv32_isv32,
+
WeightsLayoutCount // NUMBER OF ELEMENTS IN ENUM
};
}
}
-inline bool GroupedLayout(WeightsLayout l) {
- switch (l) {
- case WeightsLayout::goiyx:
- case WeightsLayout::goizyx:
- case WeightsLayout::g_os_iyx_osv16:
- case WeightsLayout::g_os_iyx_osv32:
- case WeightsLayout::gs_oiyx_gsv16:
- case WeightsLayout::gs_oizyx_gsv16:
- case WeightsLayout::gs_oiyx_gsv32:
- case WeightsLayout::g_os_iyx_osv16_rotate_180:
- case WeightsLayout::gyxio:
- case WeightsLayout::gi_yxs_os_yxsv2_osv16:
- case WeightsLayout::g_is_os_zyx_osv16_isv16:
- case WeightsLayout::g_is_os_yx_osv16_isv16:
- case WeightsLayout::g_os_is_zyx_isv8_osv16_isv2:
- case WeightsLayout::g_os_is_yx_isv8_osv16_isv2:
- case WeightsLayout::g_os_is_zyx_isv16_osv16:
- case WeightsLayout::giy_xs_os_xsv2_osv16__ao32:
- case WeightsLayout::giy_xs_os_xsv2_osv8__ao32:
- case WeightsLayout::gs_oi_yxs_gsv4_yxsv4:
- case WeightsLayout::g_os_is_yx_osv16_isv4:
- return true;
- default:
- return false;
- }
-}
+inline bool GroupedLayout(WeightsLayout l);
inline bool GroupedLayout(DataLayout) {
return false;
static WeightsChannelArray weightsChannelArray;
static NDims GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l);
};
+
+inline bool GroupedLayout(WeightsLayout l) {
+ return WeightsTensor::DoesGroupDimExist(l);
+}
+
} // namespace Tensor
} // namespace kernel_selector
DispatchData runInfo;
std::vector<size_t> global;
std::vector<size_t> local;
- if (out.GetLayout() == DataLayout::bfzyx) {
- global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
- local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
- } else if (out.GetLayout() == DataLayout::yxfb) {
+ if (out.GetLayout() == DataLayout::yxfb) {
global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v};
local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
} else if (out.GetLayout() == DataLayout::b_fs_yx_fsv16) {
global = {Align(out.Feature().v, 16) * out.Batch().v, out.X().v, out.Y().v};
local = {16, 1, 1};
} else {
- global = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v};
+ global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
}
o.GetType() != KernelType::ACTIVATION) {
return false;
}
+ const activation_params& orgParams = static_cast<const activation_params&>(p);
+
+ for (auto& fused_op : orgParams.fused_ops) {
+ if (!IsFusedPrimitiveSupported(fused_op))
+ return false;
+ }
return true;
}
k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
+ k.EnableDifferentTypes();
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();
return false;
}
- if (params.gradient) {
- if (params.inputs[0].GetLayout() != params.inputs[1].GetLayout())
- return false;
- }
+ if (params.output.GetLayout() != params.inputs[0].GetLayout())
+ return false;
- // Opt kernel supports fused activations without extra inputs, since
- // it can't calculate correct offset for tensors with different layout.
- for (auto& op : params.fused_ops) {
- if (!op.tensors.empty()) {
- for (auto& t : op.tensors) {
- if (!(t == params.inputs[0]))
- return false;
- }
- }
- }
+ if (!params.fused_ops.empty() && params.output.GetLayout() != DataLayout::bfyx &&
+ params.output.GetLayout() != DataLayout::bfzyx)
+ return false;
return true;
}
JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& params, DispatchData kd) const {
auto jit = ActivationKernelBase::GetJitConstants(params, kd);
+ auto input_dt = params.inputs[0].GetDType();
jit.AddConstant(MakeJitConstant("NUM_COLS_WI", NUM_COLS_WI));
if (!params.fused_ops.empty()) {
- auto input_dt = GetUnitType(params);
- FusedOpsConfiguration conf = {"", {"x"}, "v", input_dt, 4, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED, IndexType::LINEAR_OFFSET };
+ std::vector<std::string> idx_order;
+ if (params.inputs[0].GetDims().size() <= 4) {
+ idx_order = {"fo_b", "fo_f", "fo_y", "fo_x"};
+ } else if (params.inputs[0].GetDims().size() == 5) {
+ idx_order = {"fo_b", "fo_f", "fo_z", "fo_y", "fo_x"};
+ }
+ FusedOpsConfiguration conf =
+ {"", idx_order, "v", input_dt, 4, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED, IndexType::TENSOR_COORD};
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
}
+ jit.Merge(MakeActivationJitConstants(params.activations, input_dt, "_KERNEL"));
return jit;
}
#include "activation_kernel_base.h"
+#include <vector>
+
namespace kernel_selector {
class ActivationKernelOpt : public ActivationKernelBase {
public:
DispatchData SetDefault(const activation_params& arg) const override;
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
+ std::vector<FusedOpType> GetSupportedFusedOps() const override {
+ return { FusedOpType::QUANTIZE };
+ }
};
} // namespace kernel_selector
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
+ k.EnableDifferentTypes();
k.EnableActivationAdditionalParamsAsInput();
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
JitConstants ActivationKernelRef::GetJitConstants(const activation_params& params, DispatchData kd) const {
auto jit = ActivationKernelBase::GetJitConstants(params, kd);
+ auto input_dt = params.inputs[0].GetDType();
if (!params.fused_ops.empty()) {
- auto input_dt = GetUnitType(params);
- FusedOpsConfiguration conf = {"", {"batch", "feature", "y", "x"}, "dst", input_dt, 1 };
+ std::vector<std::string> idx_order;
+ if (params.inputs[0].GetDims().size() <= 4) {
+ idx_order = {"batch", "feature", "y", "x"};
+ } else if (params.inputs[0].GetDims().size() == 5) {
+ idx_order = {"batch", "feature", "z", "y", "x"};
+ }
+ FusedOpsConfiguration conf = {"", idx_order, "dst", input_dt, 1};
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
}
+ jit.Merge(MakeActivationJitConstants(params.activations, input_dt, "_KERNEL"));
return jit;
}
#include "activation_kernel_base.h"
+#include <vector>
+
namespace kernel_selector {
class ActivationKernelRef : public ActivationKernelBase {
public:
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
+ std::vector<FusedOpType> GetSupportedFusedOps() const override {
+ return { FusedOpType::QUANTIZE };
+ }
};
} // namespace kernel_selector
output.X().v != input.X().v || output.Y().v != input.Y().v || output.Feature().v % 16 != 0;
const bool bFilterSize = params.filterSize.x != 1 || params.filterSize.y != 1;
const bool bStride = params.stride.x != 1 || params.stride.y != 1;
+ const bool bPadding = input.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0;
- if (bOutputSizes || bFilterSize || bStride) {
+ if (bOutputSizes || bFilterSize || bStride || bPadding) {
return false;
}
if (cp.inputs[0].Feature().v != cp.groups || cp.output.Feature().v != cp.groups)
return false;
+ // Check that padding features doesn't miss-align the blocks
+ if (cp.inputs[0].Feature().pad.before % feature_block_size != 0 || cp.output.Feature().pad.before % feature_block_size != 0)
+ return false;
+
return true;
}
if (is_vector)
idx_order = {"(mb)", "(oc*OC_BLOCK + g*OC)", "od", "oh", "(ow + " + std::to_string(conf_id * 8) + ")"};
else
- idx_order = {"(mb)", "(oc*OC_BLOCK + g*OC + local_id)", "od", "oh", "(ow + i)"};
+ idx_order = {"(mb)", "(oc*OC_BLOCK + g*OC + local_id)", "od", "oh", "(ow + " + std::to_string(conf_id * 8) + " + i)"};
return { suffix,
idx_order,
GetPreferredWeightsLayout(newParams),
kd.weightsReorderParams,
GetSupportedKey(),
- newParams.groups);
+ newParams.groups,
+ newParams.transposed);
if (!succeed) {
return {};
k.EnableBatching();
k.EnableSplitSupport();
k.EnableDilation();
- k.EnableTranspose();
k.EnableGroupedConvolution();
return k;
}
WeightsLayout ConvolutionKernel_bfyx_os_iyx_osv16::GetPreferredWeightsLayout(
const convolution_params ¶ms) const {
- if (!params.transposed) {
- return (params.groups > 1) ? WeightsLayout::g_os_iyx_osv16 : WeightsLayout::os_iyx_osv16;
- } else {
- return (params.groups > 1) ? WeightsLayout::g_os_iyx_osv16_rotate_180 : WeightsLayout::os_iyx_osv16_rotate_180;
- }
+ return (params.groups > 1) ? WeightsLayout::g_os_iyx_osv16 : WeightsLayout::os_iyx_osv16;
}
KernelsData ConvolutionKernel_bfyx_os_iyx_osv16::GetKernelsData(const Params& params,
k.EnableBatching();
k.EnableSplitSupport();
k.EnableDilation();
- k.EnableTranspose();
return k;
}
WeightsLayout ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetPreferredWeightsLayout(
const convolution_params ¶ms) const {
- if (!params.transposed) {
- return WeightsLayout::os_iyx_osv16;
- } else {
- return WeightsLayout::os_iyx_osv16_rotate_180;
- }
+ return params.groups == 1 ? WeightsLayout::os_iyx_osv16 : WeightsLayout::g_os_iyx_osv16;
}
KernelsData ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetKernelsData(const Params& params,
return res;
}
-} // namespace kernel_selector
\ No newline at end of file
+} // namespace kernel_selector
}
auto& newParams = static_cast<const convolution_params&>(params);
- if ((newParams.inputs[0].Feature().v / newParams.groups) % 4 != 0)
+ if (newParams.groups > 1 && newParams.weights.IFM().v % 4 != 0)
return false;
size_t min_block_size_x = (newParams.weights.X().v - 1) * newParams.dilation.x + 1;
k.EnableDepthwiseSeparableOpt();
}
- if (transposed) {
- k.EnableTranspose();
- }
-
if (local_convolution) {
k.EnableLocalConvolution();
}
const auto& out = params.output;
+ bool ver_bsv16_fsv16 = params.output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16
+ || params.output.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16;
+
auto x = out.X().v;
auto y = out.Y().v;
auto z = out.Z().v;
auto f = Align(out.Feature().v, 16);
auto b = out.Batch().v;
- if (out.Batch().v % 16 == 0) {
+ if (ver_bsv16_fsv16) {
if (params.depthwise_separable_opt) {
kd.gws0 = x * y * z;
kd.gws1 = f;
if (!DeconvolutionKernelBase::Validate(p, o)) {
return false;
}
+ auto& deconv_params = static_cast<const deconvolution_params&>(p);
+
+ if (deconv_params.output.GetLayout() != deconv_params.inputs[0].GetLayout())
+ return false;
+
+ const auto& params = static_cast<const deconvolution_params&>(p);
+ const auto feature_block_size = 16;
+
+ // Check that padding features doesn't miss-align the blocks
+ if (params.inputs[0].Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0)
+ return false;
return true;
}
auto output = params.output;
auto jit = Parent::GetJitConstants(params);
- if (output.Batch().v % 16 == 0) {
+ bool ver_bsv16_fsv16 = params.output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16
+ || params.output.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16;
+
+ if (ver_bsv16_fsv16) {
jit.AddConstant(MakeJitConstant("VER_16MB16C", 1));
} else {
jit.AddConstant(MakeJitConstant("VER_8OW16C", 1));
icb /= 2;
}
- if (output.Batch().v % 16 == 0) {
+ if (ver_bsv16_fsv16) {
mb_block = 16;
jit.AddConstant(MakeJitConstant("MB_BLOCK", mb_block));
jit.AddConstant(MakeJitConstant("IC_BLOCK", ic_block));
jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1));
jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2));
+ if (!params.fused_ops.empty()) {
+ auto fused_dt = GetActivationType(params);
+ std::vector<std::string> idx_order_block_c00;
+ std::vector<std::string> idx_order_block_c01;
+ std::vector<std::string> idx_order_block_ci;
+
+ if (params.output.Dimentions() <= 4) {
+ idx_order_block_c00 = { "mb", "(g * IC + gic * IC_BLOCK)", "ih", "iw" };
+ idx_order_block_c01 = { "(mb + 8)", "(g * IC + gic * IC_BLOCK)", "ih", "iw" };
+ idx_order_block_ci = { "mb", "(g * IC + gic * IC_BLOCK)", "ih", "(iw + i)" };
+ } else {
+ idx_order_block_c00 = { "mb", "(g * IC + gic * IC_BLOCK)", "id", "ih", "iw" };
+ idx_order_block_c01 = { "(mb + 8)", "(g * IC + gic * IC_BLOCK)", "id", "ih", "iw" };
+ idx_order_block_ci = { "mb", "(g * IC + gic * IC_BLOCK)", "id", "ih", "(iw + i)" };
+ }
+
+ FusedOpsConfiguration conf_c00 = {
+ "_BLOCK_C00",
+ idx_order_block_c00,
+ "blockC00",
+ fused_dt,
+ 8,
+ LoadType::LT_ALIGNED_READ,
+ BoundaryCheck::ENABLED,
+ IndexType::TENSOR_COORD,
+ Tensor::DataChannelName::BATCH };
+ FusedOpsConfiguration conf_c01 = {
+ "_BLOCK_C01",
+ idx_order_block_c01,
+ "blockC01",
+ fused_dt,
+ 8,
+ LoadType::LT_ALIGNED_READ,
+ BoundaryCheck::ENABLED,
+ IndexType::TENSOR_COORD,
+ Tensor::DataChannelName::BATCH };
+ FusedOpsConfiguration conf_ci = { "_BLOCK_CI", idx_order_block_ci, "blockC00[i]", fused_dt, 1, LoadType::LT_ALIGNED_READ };
+
+ jit.Merge(MakeFusedOpsJitConstants(params, { conf_c00, conf_c01, conf_ci }));
+ }
+
return jit;
}
protected:
WeightsLayout GetPreferredWeightsLayout(const deconvolution_params& p) const override {
- if (p.output.GetLayout() == DataLayout::b_fs_yx_fsv16)
+ if (p.output.Dimentions() == 4)
return WeightsLayout::is_os_yx_osv16_isv16;
else
return WeightsLayout::is_os_zyx_osv16_isv16;
bool Validate(const Params& p, const optional_params& o) const override;
CommonDispatchData SetDefault(const deconvolution_params& arg) const override;
JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+ std::vector<FusedOpType> GetSupportedFusedOps() const override {
+ return {
+ FusedOpType::ACTIVATION,
+ FusedOpType::ELTWISE,
+ FusedOpType::SCALE,
+ FusedOpType::QUANTIZE
+ };
+ }
};
} // namespace kernel_selector
#include "kernel_selector_utils.h"
#include <algorithm>
+#include <vector>
+#include <string>
+#include <iostream>
namespace kernel_selector {
static const size_t sub_group_size = 16;
static const size_t feature_block_size = 16;
+static const float max_reg_pressure = 3.f / 4.f;
-size_t DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetBlockSizeX(const deconvolution_params& params) const {
- std::vector<size_t> blockWidths = {8, 4, 2, 1};
- for (auto& blockSize : blockWidths)
- if (params.output.X().v % blockSize == 0) {
- return blockSize;
+float DeconvolutionKernel_b_fs_zyx_fsv16_dw::EstimateRegPressure(const deconvolution_params& params,
+ const dispatch_params& d_params) const {
+ size_t usage_bytes = 0;
+
+ usage_bytes += d_params.block_size_x * BytesPerElement(GetAccumulatorType(params));
+
+ if (d_params.preload_weights == weights_preload::all) {
+ usage_bytes += params.weights.X().v * params.weights.Y().v * params.weights.Z().v * BytesPerElement(params.weights.GetDType());
+ } else if (d_params.preload_weights == weights_preload::line) {
+ usage_bytes += params.weights.X().v * BytesPerElement(params.weights.GetDType());
+ } else {
+ usage_bytes += BytesPerElement(params.weights.GetDType());
+ }
+
+ if (d_params.preload_input == input_preload::line) {
+ size_t input_line_size = CeilDiv(d_params.block_size_x + params.weights.X().v - 1, params.stride.x);
+ usage_bytes += input_line_size * BytesPerElement(params.inputs[0].GetDType());
+ } else {
+ usage_bytes += BytesPerElement(params.inputs[0].GetDType());
+ }
+
+ constexpr size_t register_num = 128;
+ constexpr size_t register_bytes = 32;
+ constexpr size_t max_register_bytes = register_num * register_bytes;
+
+ return static_cast<float>(usage_bytes * sub_group_size) / static_cast<float>(max_register_bytes);
+}
+
+DeconvolutionKernel_b_fs_zyx_fsv16_dw::dispatch_params
+DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetDispatchParams(const deconvolution_params& params) const {
+ std::vector<dispatch_params> ordered_params;
+ if (params.inputs[0].GetDType() == Datatype::F16 || params.inputs[0].GetDType() == Datatype::F32) {
+ ordered_params = {
+ // Preload weights
+ dispatch_params{8, input_preload::none, weights_preload::all},
+ dispatch_params{4, input_preload::none, weights_preload::all},
+ dispatch_params{2, input_preload::none, weights_preload::all},
+ dispatch_params{1, input_preload::none, weights_preload::all},
+ // No preloading
+ dispatch_params{8, input_preload::none, weights_preload::none},
+ dispatch_params{4, input_preload::none, weights_preload::none},
+ dispatch_params{2, input_preload::none, weights_preload::none},
+ dispatch_params{1, input_preload::none, weights_preload::none},
+ };
+ } else {
+ ordered_params = {
+ dispatch_params{16, input_preload::line, weights_preload::line},
+ dispatch_params{8, input_preload::line, weights_preload::line},
+ dispatch_params{4, input_preload::line, weights_preload::line},
+ dispatch_params{16, input_preload::line, weights_preload::none},
+ dispatch_params{8, input_preload::line, weights_preload::none},
+ dispatch_params{4, input_preload::line, weights_preload::none},
+ dispatch_params{2, input_preload::line, weights_preload::line},
+ dispatch_params{2, input_preload::line, weights_preload::none},
+ dispatch_params{1, input_preload::line, weights_preload::none},
+ dispatch_params{1, input_preload::none, weights_preload::none},
+ };
+ }
+
+ dispatch_params best_params = dispatch_params{ 1, input_preload::none, weights_preload::none };
+
+ for (auto& d_params : ordered_params) {
+ bool good_block_size_x = params.output.X().v % d_params.block_size_x == 0 || params.output.X().v > d_params.block_size_x * 3;
+ bool good_reg_pressure = EstimateRegPressure(params, d_params) <= max_reg_pressure;
+ // No support for no input preload and weights line preload in kernel
+ bool good_preloads = !(d_params.preload_input == input_preload::none && d_params.preload_weights == weights_preload::line);
+ // At least one input preload
+ bool full_input_preload = d_params.preload_input != input_preload::line ||
+ CeilDiv(d_params.block_size_x + params.filterSize.x - 1, params.stride.x) <= params.inputs[0].X().v;
+
+ if (good_block_size_x && good_reg_pressure && good_preloads && full_input_preload) {
+ best_params = d_params;
+ break;
}
- return 1;
+ }
+
+ return best_params;
}
ParamsKey DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::F32);
- k.EnableOutputDataType(Datatype::F32);
- k.EnableInputWeightsType(WeightsType::F32);
k.EnableInputDataType(Datatype::F16);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+
+ k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
- k.EnableInputWeightsType(WeightsType::F16);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+
+ k.EnableAllInputWeightsType();
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableSubGroup();
k.EnableSubGroupShort();
k.EnableGroupedConvolution();
+ k.EnableDifferentTypes();
+ k.EnableDifferentInputWeightsTypes();
return k;
}
auto f = out.Feature().v;
auto b = out.Batch().v;
- kd.gws0 = (x / GetBlockSizeX(params)) * y * z;
+ kd.gws0 = CeilDiv(x, GetDispatchParams(params).block_size_x) * y * z;
kd.gws1 = Align(f, feature_block_size);
kd.gws2 = b;
return false;
}
- const deconvolution_params& params = static_cast<const deconvolution_params&>(p);
+ const auto& params = static_cast<const deconvolution_params&>(p);
if (params.groups == 1)
return false;
if (params.weights.IFM().v != 1 || params.weights.OFM().v != 1)
return false;
+ // Check that padding features doesn't miss-align the blocks
+ if (params.inputs[0].Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0)
+ return false;
+
return true;
}
auto output = params.output;
auto jit = Parent::GetJitConstants(params);
- jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", GetBlockSizeX(params)));
+ auto dp = GetDispatchParams(params);
+ auto& block_size_x = dp.block_size_x;
+
+ jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", block_size_x));
jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
if (params.output.Feature().v % feature_block_size != 0) {
jit.AddConstant(MakeJitConstant("OUTPUT_LEFTOVERS", params.output.Feature().v % feature_block_size));
}
+ jit.AddConstant(MakeJitConstant("INPUT_BLOCK_SIZE_X", CeilDiv(block_size_x + params.filterSize.x - 1, params.stride.x)));
+ jit.AddConstant(MakeJitConstant("PRELOAD_INPUT_LINE", dp.preload_input == input_preload::line));
+ jit.AddConstant(MakeJitConstant("PRELOAD_WEIGHTS", dp.preload_weights == weights_preload::all));
+ jit.AddConstant(MakeJitConstant("PRELOAD_WEIGHTS_LINE", dp.preload_weights == weights_preload::line));
+
+ if (!params.fused_ops.empty()) {
+ auto fused_dt = GetActivationType(params);
+ std::vector<std::string> idx_order;
+ if (params.output.Dimentions() <= 4) {
+ idx_order = {"b", "fg", "y", "x"};
+ } else {
+ idx_order = { "b", "fg", "z", "y", "x" };
+ }
+ auto boundary_check = BoundaryCheck::ENABLED;
+ if (params.output.Feature().v % feature_block_size == 0 && params.output.X().v % block_size_x == 0) {
+ boundary_check = BoundaryCheck::DISABLED;
+ }
+ FusedOpsConfiguration conf = {
+ "",
+ idx_order,
+ "dequantized",
+ fused_dt,
+ block_size_x,
+ LoadType::LT_ALIGNED_READ,
+ boundary_check,
+ IndexType::TENSOR_COORD,
+ Tensor::DataChannelName::X };
+ jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+ }
return jit;
}
bool Validate(const Params& p, const optional_params& o) const override;
CommonDispatchData SetDefault(const deconvolution_params& arg) const override;
JitConstants GetJitConstants(const deconvolution_params& params) const override;
- size_t GetBlockSizeX(const deconvolution_params& params) const;
+
+ enum class weights_preload {
+ none,
+ line,
+ all
+ };
+ enum class input_preload {
+ none,
+ line
+ };
+
+ struct dispatch_params {
+ size_t block_size_x;
+ input_preload preload_input;
+ weights_preload preload_weights;
+ };
+ dispatch_params GetDispatchParams(const deconvolution_params& params) const;
+ float EstimateRegPressure(const deconvolution_params& params, const dispatch_params& disp_params) const;
+
+ std::vector<FusedOpType> GetSupportedFusedOps() const override {
+ return {
+ FusedOpType::ACTIVATION,
+ FusedOpType::ELTWISE,
+ FusedOpType::SCALE,
+ FusedOpType::QUANTIZE
+ };
+ }
};
} // namespace kernel_selector
return false;
}
+ for (auto& fused_op : params.fused_ops) {
+ if (!IsFusedPrimitiveSupported(fused_op))
+ return false;
+ }
+
return true;
}
(dp.filterSize.y - 1 + padding.y) * input.Y().pitch;
input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
- jit.AddConstants({MakeJitConstant("STRIDE", dp.stride),
- MakeJitConstant("PADDING", dp.padding),
- MakeJitConstant("DILATION", dp.dilation),
- MakeJitConstant("FILTER_ARRAY_NUM", dp.split),
- MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding),
- MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", dp.depthwise_separable_opt),
- MakeJitConstant("FUSED_ELTWISE", dp.fused_eltwise),
- MakeJitConstant("GROUPED", (dp.groups > 1) ? 1 : 0)});
+ jit.AddConstants({ MakeJitConstant("STRIDE", dp.stride),
+ MakeJitConstant("PADDING", dp.padding),
+ MakeJitConstant("DILATION", dp.dilation),
+ MakeJitConstant("FILTER_ARRAY_NUM", dp.split),
+ MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding),
+ MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", dp.depthwise_separable_opt),
+ MakeJitConstant("GROUPED", (dp.groups > 1) ? 1 : 0) });
+ jit.Merge(MakeTypeJitConstants(GetAccumulatorType(dp), "ACCUMULATOR"));
+ jit.Merge(MakeTypeJitConstants(GetActivationType(dp), "ACTIVATION"));
return jit;
}
KernelData kd = KernelData::Default<deconvolution_params>(params);
deconvolution_params& newParams = *static_cast<deconvolution_params*>(kd.params.get());
- bool succeed = UpdateWeightsParams(newParams, options, GetPreferredWeightsLayout(newParams), kd.weightsReorderParams);
+ bool succeed = UpdateWeightsParams(newParams,
+ options,
+ GetPreferredWeightsLayout(newParams),
+ kd.weightsReorderParams,
+ GetSupportedKey(),
+ newParams.groups);
if (!succeed) {
return {};
entry_point,
DEFAULT,
true,
- !newParams.bias.empty());
+ !newParams.bias.empty(),
+ 1,
+ GetFusedPrimitiveInputsCount(params));
kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0});
- if (orgParams.fused_eltwise)
- kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
kd.estimatedTime = runInfo.efficiency;
return {kd};
}
-} // namespace kernel_selector
\ No newline at end of file
+
+Datatype DeconvolutionKernelBase::GetAccumulatorType(const deconvolution_params& params) const {
+ if (params.inputs[0].GetDType() == Datatype::INT8 || params.inputs[0].GetDType() == Datatype::UINT8)
+ return Datatype::INT32;
+
+ // input is either fp32 or fp16
+ // for fp32->fp16 accumulate to fp16, otherwise accumulate to input type
+ if (params.output.GetDType() == Datatype::F16)
+ return Datatype::F16;
+
+ return params.inputs[0].GetDType();
+}
+
+Datatype DeconvolutionKernelBase::GetActivationType(const deconvolution_params& params) const {
+ auto accumulator_dt = GetAccumulatorType(params);
+ if (accumulator_dt == Datatype::INT32)
+ return Datatype::F32;
+ return accumulator_dt;
+}
+
+} // namespace kernel_selector
uint32_t split = 1;
uint32_t groups = 1;
bool depthwise_separable_opt = false;
- bool fused_eltwise = false;
std::string to_string() const override;
return (params.groups > 1) ? WeightsLayout::goizyx : WeightsLayout::oizyx;
}
bool Validate(const Params& p, const optional_params& o) const override;
+
+ virtual Datatype GetAccumulatorType(const deconvolution_params& params) const;
+ virtual Datatype GetActivationType(const deconvolution_params& params) const;
};
-} // namespace kernel_selector
\ No newline at end of file
+} // namespace kernel_selector
kd.efficiency = FORCE_PRIORITY_6;
return kd;
}
-} // namespace kernel_selector
\ No newline at end of file
+
+JitConstants DeconvolutionKernel_bfyx_opt::GetJitConstants(const deconvolution_params& params) const {
+ auto jit = Parent::GetJitConstants(params);
+
+ if (!params.fused_ops.empty()) {
+ auto fused_dt = GetActivationType(params);
+ FusedOpsConfiguration conf = {
+ "",
+ {"batch_offset", "ofm_offset", "id_y", "id_x"},
+ "result",
+ fused_dt,
+ 1,
+ LoadType::LT_UNALIGNED,
+ BoundaryCheck::DISABLED };
+ jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+ }
+ return jit;
+}
+
+} // namespace kernel_selector
class DeconvolutionKernel_bfyx_opt : public DeconvolutionKernelBase {
public:
+ using Parent = DeconvolutionKernelBase;
DeconvolutionKernel_bfyx_opt() : DeconvolutionKernelBase("deconvolution_gpu_bfyx_opt") {}
virtual ~DeconvolutionKernel_bfyx_opt() {}
protected:
CommonDispatchData SetDefault(const deconvolution_params& params) const override;
+ JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+ std::vector<FusedOpType> GetSupportedFusedOps() const override {
+ return {
+ FusedOpType::ACTIVATION,
+ FusedOpType::ELTWISE,
+ FusedOpType::SCALE,
+ FusedOpType::QUANTIZE
+ };
+ }
};
-} // namespace kernel_selector
\ No newline at end of file
+} // namespace kernel_selector
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "deconvolution_kernel_imad_along_f_tile_bfx.hpp"
+
+#include "kernel_selector_utils.h"
+
+#include <algorithm>
+#include <vector>
+#include <iostream>
+#include <string>
+
+namespace kernel_selector {
+
+namespace {
+ constexpr size_t simd = 16;
+}
+
+ParamsKey DeconvolutionKernel_imad_along_f_tile_bfx::GetSupportedKey() const {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+ k.EnableInputWeightsType(WeightsType::INT8);
+ k.EnableInputWeightsType(WeightsType::UINT8);
+
+ k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+ k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+ k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
+ k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
+
+ k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
+ k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
+ k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
+ k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv32);
+
+ k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
+ k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
+ k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+ k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+
+ k.EnableInputLayout(DataLayout::byxf_af32);
+ k.EnableOutputLayout(DataLayout::byxf_af32);
+
+ k.EnableDifferentTypes();
+ k.EnableDifferentInputWeightsTypes();
+ k.EnableBatching();
+ k.EnableTensorPitches();
+ k.EnableTensorOffset();
+ k.EnableBiasPerFeature();
+ k.EnableNonBiasTerm();
+ k.EnableGroupedConvolution();
+
+ return k;
+}
+
+bool DeconvolutionKernel_imad_along_f_tile_bfx::Validate(const Params& p, const optional_params& o) const {
+ if (!Parent::Validate(p, o))
+ return false;
+
+ auto& params = static_cast<const deconvolution_params&>(p);
+ if (params.groups > 1 && params.weights.IFM().v % 4 != 0)
+ return false;
+
+ // Consider loosening at the cost of performance
+ if (params.groups > 1 && params.weights.OFM().v % simd != 0)
+ return false;
+
+ return true;
+}
+
+WeightsLayout DeconvolutionKernel_imad_along_f_tile_bfx::GetPreferredWeightsLayout(const deconvolution_params& params) const {
+ // isv, osv
+ using layout_map_key = std::tuple<size_t, size_t>;
+ using layout_map = std::map<layout_map_key, WeightsLayout>;
+
+ layout_map lt_map = {
+ {layout_map_key((size_t)4, (size_t)16), WeightsLayout::g_os_zyx_is_osv16_isv4 },
+ {layout_map_key((size_t)16, (size_t)16), WeightsLayout::g_os_zyx_is_osv16_isv16 },
+ {layout_map_key((size_t)32, (size_t)16), WeightsLayout::g_os_zyx_is_osv16_isv32 },
+ {layout_map_key((size_t)4, (size_t)32), WeightsLayout::g_os_zyx_is_osv32_isv4 },
+ {layout_map_key((size_t)16, (size_t)32), WeightsLayout::g_os_zyx_is_osv32_isv16 },
+ {layout_map_key((size_t)32, (size_t)32), WeightsLayout::g_os_zyx_is_osv32_isv32 }};
+
+ auto tile_ifm = GetTileIFM(params);
+ auto tile_ofm_simd = GetTileOFM(params) * simd;
+ auto key = layout_map_key(tile_ifm, tile_ofm_simd);
+ auto it = lt_map.find(key);
+ if (it == lt_map.end()) {
+ // Params are not valid for this implementation, return anything to allow Validate to reject
+ return WeightsLayout::goizyx;
+ }
+ auto layout = it->second;
+ return layout;
+}
+
+DeconvolutionKernelBase::DispatchData DeconvolutionKernel_imad_along_f_tile_bfx::SetDefault(const deconvolution_params& params) const {
+ auto dispatch = Parent::SetDefault(params);
+
+ auto tile_x = GetTileX(params);
+ auto tile_ofm = GetTileOFM(params);
+ auto tile_b = GetTileB(params);
+
+ std::vector<size_t> global = {
+ CeilDiv(params.output.X().v, tile_x) * params.output.Y().v * params.output.Z().v,
+ Align(CeilDiv(params.output.Feature().v, tile_ofm), simd),
+ CeilDiv(params.output.Batch().v, tile_b)
+ };
+
+ std::vector<size_t> local = { 1, simd, 1 };
+
+ dispatch.gws0 = global[0];
+ dispatch.gws1 = global[1];
+ dispatch.gws2 = global[2];
+
+ dispatch.lws0 = local[0];
+ dispatch.lws1 = local[1];
+ dispatch.lws2 = local[2];
+
+ // Currently most optimized for fsv16 formats
+ if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16 || params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16) {
+ dispatch.efficiency = FORCE_PRIORITY_7;
+ } else {
+ dispatch.efficiency = FORCE_PRIORITY_8;
+ }
+
+ return dispatch;
+}
+
+JitConstants DeconvolutionKernel_imad_along_f_tile_bfx::GetJitConstants(const deconvolution_params& params) const {
+ auto jit = Parent::GetJitConstants(params);
+ auto tile_ifm = GetTileIFM(params);
+ auto tile_x = GetTileX(params);
+ auto tile_ofm = GetTileOFM(params);
+ auto tile_b = GetTileB(params);
+
+ jit.AddConstant(MakeJitConstant("TILE_IFM", tile_ifm));
+ jit.AddConstant(MakeJitConstant("TILE_X", tile_x));
+ jit.AddConstant(MakeJitConstant("TILE_OFM", tile_ofm));
+ jit.AddConstant(MakeJitConstant("TILE_B", tile_b));
+ jit.AddConstant(MakeJitConstant("SIMD", simd));
+
+ auto& in = params.inputs[0];
+ auto in_layout = in.GetLayout();
+
+ // Layout specific params
+ size_t input_tile_ifm_pitch = 0;
+ size_t input_in_tile_batch_pitch = 0;
+ size_t zyx_pitch_factor = in.Z().LogicalDimPadded() * in.Y().LogicalDimPadded() * in.X().LogicalDimPadded();
+
+ if (in_layout == DataLayout::b_fs_yx_fsv16 || in_layout == DataLayout::b_fs_zyx_fsv16) {
+ if (tile_ifm == 16) {
+ input_tile_ifm_pitch = zyx_pitch_factor * 16;
+ }
+ input_in_tile_batch_pitch = Align(in.Feature().LogicalDimPadded(), 16) * zyx_pitch_factor;
+ } else if (in_layout == DataLayout::b_fs_yx_fsv32 || in_layout == DataLayout::b_fs_yx_fsv32) {
+ if (tile_ifm == 32) {
+ input_tile_ifm_pitch = zyx_pitch_factor * 32;
+ }
+ input_in_tile_batch_pitch = Align(in.Feature().LogicalDimPadded(), 32) * zyx_pitch_factor;
+ } else if (in_layout == DataLayout::bs_fs_yx_bsv16_fsv16 || in_layout == DataLayout::bs_fs_zyx_bsv16_fsv16) {
+ if (tile_ifm == 16) {
+ input_tile_ifm_pitch = zyx_pitch_factor * 16 * 16;
+ }
+ input_in_tile_batch_pitch = 16;
+ } else if (in_layout == DataLayout::byxf_af32) {
+ input_tile_ifm_pitch = tile_ifm;
+ input_in_tile_batch_pitch = zyx_pitch_factor * Align(in.Feature().LogicalDimPadded(), 32);
+ }
+
+ jit.AddConstant(MakeJitConstant("INPUT_VALID_TILE_IFM_PITCH", input_tile_ifm_pitch != 0));
+ jit.AddConstant(MakeJitConstant("INPUT_TILE_IFM_PITCH", input_tile_ifm_pitch));
+ jit.AddConstant(MakeJitConstant("INPUT_IN_TILE_B_PITCH", input_in_tile_batch_pitch));
+
+ if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 || params.output.GetLayout() == DataLayout::b_fs_zyx_fsv16) {
+ jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_X_STORE", true));
+ } else {
+ jit.AddConstant(MakeJitConstant("OUTPUT_NAIVE_STORE", true));
+ }
+
+ if (!params.fused_ops.empty()) {
+ auto fused_in_dt = GetActivationType(params);
+ std::vector<std::string> idx_order;
+ if (params.output.Dimentions() <= 4) {
+ idx_order = { "(out_b + ob)", "(out_f + of * SIMD)", "out_y", "(out_x + tx)" };
+ } else {
+ idx_order = { "(out_b + ob)", "(out_f + of * SIMD)", "out_z", "out_y", "(out_x + tx)" };
+ }
+ auto boundary_check = BoundaryCheck::DISABLED;
+ if (params.output.X().v % tile_x != 0
+ || params.output.Feature().v % (tile_ofm * simd) != 0
+ || params.output.Batch().v % tile_b != 0) {
+ boundary_check = BoundaryCheck::ENABLED;
+ }
+ std::vector<Tensor::DataChannelName> loop_axes = { Tensor::DataChannelName::X };
+ if (tile_b != 1) {
+ loop_axes.push_back(Tensor::DataChannelName::BATCH);
+ } else {
+ idx_order[0] = "out_b";
+ }
+
+ auto conf = FusedOpsConfiguration{ "",
+ idx_order,
+ "dequantized[ob][of][tx]",
+ fused_in_dt,
+ 1,
+ LoadType::LT_UNALIGNED,
+ boundary_check,
+ IndexType::TENSOR_COORD,
+ Tensor::DataChannelName::X,
+ loop_axes,
+ true };
+
+ jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+ }
+
+ return jit;
+}
+
+size_t DeconvolutionKernel_imad_along_f_tile_bfx::GetTileIFM(const deconvolution_params& params) const {
+ size_t fsv = 4;
+ if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16
+ || params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16
+ || params.inputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16
+ || params.inputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16) {
+ fsv = 16;
+ }
+ if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv32
+ || params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv32
+ || params.inputs[0].GetLayout() == DataLayout::byxf_af32) {
+ fsv = 32;
+ }
+
+ auto ifm = params.weights.IFM().v;
+ bool grouped = params.groups > 1;
+ auto pref_tile_ifm = std::min(fsv, ifm);
+
+ std::vector<size_t> allowed_tile_ifm = { 4, 16, 32 };
+ size_t tile_ifm = 1;
+ for (auto candidate : allowed_tile_ifm) {
+ if (candidate <= pref_tile_ifm
+ && (!grouped || ifm % candidate == 0))
+ tile_ifm = candidate;
+ }
+ return tile_ifm;
+}
+
+size_t DeconvolutionKernel_imad_along_f_tile_bfx::GetTileOFM(const deconvolution_params& params) const {
+ // TODO Loosen divisibility requirement for tile ofm 2
+ if (params.weights.OFM().v % (simd * 2) == 0 && params.output.Batch().v % 2 != 0)
+ return 2;
+
+ return 1;
+}
+
+size_t DeconvolutionKernel_imad_along_f_tile_bfx::GetTileX(const deconvolution_params& params) const {
+ constexpr size_t max_tile_x = simd;
+ if (params.output.X().v <= max_tile_x)
+ return params.output.X().v;
+
+ return max_tile_x;
+}
+
+size_t DeconvolutionKernel_imad_along_f_tile_bfx::GetTileB(const deconvolution_params& params) const {
+ if (params.output.Batch().v % 2 == 0)
+ return 2;
+
+ return 1;
+}
+
+} // namespace kernel_selector
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "deconvolution_kernel_base.h"
+#include <vector>
+
+namespace kernel_selector {
+
+class DeconvolutionKernel_imad_along_f_tile_bfx : public DeconvolutionKernelBase {
+public:
+ using Parent = DeconvolutionKernelBase;
+ DeconvolutionKernel_imad_along_f_tile_bfx() : DeconvolutionKernelBase("deconvolution_gpu_imad_along_f_tile_bfx") {}
+ virtual ~DeconvolutionKernel_imad_along_f_tile_bfx() = default;
+
+ ParamsKey GetSupportedKey() const override;
+
+protected:
+ bool Validate(const Params& p, const optional_params& o) const override;
+ WeightsLayout GetPreferredWeightsLayout(const deconvolution_params ¶ms) const override;
+ CommonDispatchData SetDefault(const deconvolution_params& params) const override;
+ JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+ std::vector<FusedOpType> GetSupportedFusedOps() const override {
+ return {
+ FusedOpType::ACTIVATION,
+ FusedOpType::ELTWISE,
+ FusedOpType::SCALE,
+ FusedOpType::QUANTIZE
+ };
+ }
+
+ size_t GetTileIFM(const deconvolution_params& params) const;
+ size_t GetTileOFM(const deconvolution_params& params) const;
+ size_t GetTileX(const deconvolution_params& params) const;
+ size_t GetTileB(const deconvolution_params& params) const;
+};
+
+} // namespace kernel_selector
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "deconvolution_kernel_imad_ref.hpp"
+
+#include "kernel_selector_utils.h"
+
+#include <vector>
+#include <string>
+
+namespace kernel_selector {
+
+ParamsKey DeconvolutionKernel_imad_ref::GetSupportedKey() const {
+ ParamsKey k;
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+ k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::F16);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+ k.EnableInputWeightsType(WeightsType::INT8);
+ k.EnableInputWeightsType(WeightsType::UINT8);
+
+ k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
+ k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+ k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
+ k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
+ k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+ k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
+ k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
+ k.EnableInputLayout(DataLayout::byxf_af32);
+ k.EnableAllOutputLayout();
+
+ k.EnableDifferentTypes();
+ k.EnableDifferentInputWeightsTypes();
+ k.EnableBatching();
+ k.EnableTensorPitches();
+ k.EnableTensorOffset();
+ k.EnableBiasPerFeature();
+ k.EnableNonBiasTerm();
+ k.EnableGroupedConvolution();
+
+ return k;
+}
+
+WeightsLayout DeconvolutionKernel_imad_ref::GetPreferredWeightsLayout(const deconvolution_params&) const {
+ return WeightsLayout::g_os_zyx_is_osv32_isv4;
+}
+
+DeconvolutionKernelBase::DispatchData DeconvolutionKernel_imad_ref::SetDefault(const deconvolution_params& params) const {
+ auto dispatch = Parent::SetDefault(params);
+
+ std::vector<size_t> global = {
+ params.output.Feature().v,
+ params.output.X().v * params.output.Y().v * params.output.Z().v,
+ params.output.Batch().v
+ };
+
+ auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+
+ dispatch.gws0 = global[0];
+ dispatch.gws1 = global[1];
+ dispatch.gws2 = global[2];
+
+ dispatch.lws0 = local[0];
+ dispatch.lws1 = local[1];
+ dispatch.lws2 = local[2];
+
+ dispatch.efficiency = FORCE_PRIORITY_9;
+
+ return dispatch;
+}
+
+JitConstants DeconvolutionKernel_imad_ref::GetJitConstants(const deconvolution_params& params) const {
+ auto jit = Parent::GetJitConstants(params);
+ auto tile_ifm = GetTileIFM(params);
+
+ jit.AddConstant(MakeJitConstant("TILE_IFM", tile_ifm));
+
+ if (!params.fused_ops.empty()) {
+ std::vector<std::string> idx_order;
+ if (params.output.Dimentions() <= 4) {
+ idx_order = { "out_b", "out_f", "out_y", "out_x" };
+ } else {
+ idx_order = { "out_b", "out_f", "out_z", "out_y", "out_x" };
+ }
+ auto conf = FusedOpsConfiguration{ "", idx_order, "dequantized", GetActivationType(params), 1, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED };
+ jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+ }
+
+ return jit;
+}
+
+size_t DeconvolutionKernel_imad_ref::GetTileIFM(const deconvolution_params&) const {
+ return 4;
+}
+
+
+} // namespace kernel_selector
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "deconvolution_kernel_base.h"
+#include <vector>
+
+namespace kernel_selector {
+
+class DeconvolutionKernel_imad_ref : public DeconvolutionKernelBase {
+public:
+ using Parent = DeconvolutionKernelBase;
+ DeconvolutionKernel_imad_ref() : DeconvolutionKernelBase("deconvolution_gpu_imad_ref") {}
+ virtual ~DeconvolutionKernel_imad_ref() = default;
+
+ ParamsKey GetSupportedKey() const override;
+
+protected:
+ WeightsLayout GetPreferredWeightsLayout(const deconvolution_params ¶ms) const override;
+ CommonDispatchData SetDefault(const deconvolution_params& params) const override;
+ JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+ std::vector<FusedOpType> GetSupportedFusedOps() const override {
+ return {
+ FusedOpType::ACTIVATION,
+ FusedOpType::ELTWISE,
+ FusedOpType::SCALE,
+ FusedOpType::QUANTIZE
+ };
+ }
+
+ size_t GetTileIFM(const deconvolution_params& params) const;
+};
+
+} // namespace kernel_selector
ParamsKey k;
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
+ k.EnableInputDataType(Datatype::INT8);
+ k.EnableInputDataType(Datatype::UINT8);
+
k.EnableInputWeightsType(WeightsType::F16);
k.EnableInputWeightsType(WeightsType::F32);
+ k.EnableInputWeightsType(WeightsType::INT8);
+ k.EnableInputWeightsType(WeightsType::UINT8);
+
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
+ k.EnableOutputDataType(Datatype::INT8);
+ k.EnableOutputDataType(Datatype::UINT8);
+
k.EnableInputLayout(DataLayout::yxfb);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableInputLayout(DataLayout::byxf);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBiasPerFeature();
k.EnableDepthwiseSeparableOpt();
k.EnableGradient();
k.EnableGroupedConvolution();
+ k.EnableDifferentTypes();
+ k.EnableDifferentInputWeightsTypes();
return k;
}
if (params.output.Feature().v * params.output.Batch().v <= 16)
jit.AddConstant(MakeJitConstant("DIM_ORDER_XYBF", 1));
+ if (!params.fused_ops.empty()) {
+ auto fused_dt = GetActivationType(params);
+ std::vector<std::string> idx_order;
+ if (params.output.Dimentions() <= 4) {
+ idx_order = { "batch_offset", "ofm_offset", "out_y", "out_x" };
+ } else {
+ idx_order = { "batch_offset", "ofm_offset", "out_z", "out_y", "out_x" };
+ }
+ FusedOpsConfiguration conf = { "", idx_order, "post_activation", fused_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED };
+
+ jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+ }
+
return jit;
}
} // namespace kernel_selector
protected:
CommonDispatchData SetDefault(const deconvolution_params& params) const override;
JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+ std::vector<FusedOpType> GetSupportedFusedOps() const override {
+ return {
+ FusedOpType::ACTIVATION,
+ FusedOpType::ELTWISE,
+ FusedOpType::SCALE,
+ FusedOpType::QUANTIZE
+ };
+ }
};
-} // namespace kernel_selector
\ No newline at end of file
+} // namespace kernel_selector
#include "deconvolution_kernel_bfyx_opt.h"
#include "deconvolution_kernel_b_fs_zyx_fsv16.h"
#include "deconvolution_kernel_b_fs_zyx_fsv16_dw.h"
+#include "deconvolution_kernel_imad_ref.hpp"
+#include "deconvolution_kernel_imad_along_f_tile_bfx.hpp"
namespace kernel_selector {
deconvolution_kernel_selector::deconvolution_kernel_selector() {
Attach<DeconvolutionKernel_bfyx_opt>();
Attach<DeconvolutionKernel_b_fs_zyx_fsv16>();
Attach<DeconvolutionKernel_b_fs_zyx_fsv16_dw>();
+ Attach<DeconvolutionKernel_imad_ref>();
+ Attach<DeconvolutionKernel_imad_along_f_tile_bfx>();
}
KernelsData deconvolution_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
auto input0 = ewParams.inputs[0];
+ // Check that padding before features doesn't miss-align the blocks
+ auto feature_block_size = 16;
+ if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
+ return false;
+ }
+
for (size_t i = 1; i < ewParams.inputs.size(); i++) {
if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(ewParams.inputs[i] == input0))
return false;
+ if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) {
+ return false;
+ }
}
return true;
return false;
}
+ const auto& params = static_cast<const pooling_params&>(p);
+ const auto feature_block_size = 16;
+
+ // Check that padding features doesn't miss-align the blocks
+ if (params.inputs[0].Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0)
+ return false;
+
return true;
}
bool has_post_scale,
bool has_post_shift,
bool has_pre_shift,
+ bool has_clamp,
bool per_tensor_input_range,
bool per_tensor_input_scale,
bool per_tensor_input_shift,
, has_post_scale(has_post_scale)
, has_post_shift(has_post_shift)
, has_pre_shift(has_pre_shift)
+ , has_clamp(has_clamp)
, per_tensor_input_range(per_tensor_input_range)
, per_tensor_input_scale(per_tensor_input_scale)
, per_tensor_input_shift(per_tensor_input_shift)
bool has_post_scale;
bool has_post_shift;
bool has_pre_shift;
+ bool has_clamp;
bool per_tensor_input_range;
bool per_tensor_input_scale;
MakeJitConstant("OUTPUT", output),
};
+ if (params.rotate_180) {
+ jit.AddConstant(MakeJitConstant("REORDER_ROTATE", params.rotate_180));
+ }
+
if (fp16Supported) {
jit.Merge(MakeUnitTypeJitConstants(Datatype::F16));
} else {
WeightsTensor input;
WeightsTensor output;
bool winograd = false;
+ bool rotate_180 = false;
virtual ParamsKey GetParamsKey() const {
ParamsKey k;
if (winograd) {
k.EnableWinogradReorder();
}
+
+ if (rotate_180) {
+ k.EnableRotateReorder();
+ }
return k;
}
};
k.EnableDifferentTypes();
k.EnableTensorOffset();
k.EnableTensorPitches();
+ k.EnableRotateReorder();
return k;
}
const reorder_weights_params& orgParams = static_cast<const reorder_weights_params&>(params);
return GetCommonKernelsData(orgParams, options, DONT_USE_IF_HAVE_SOMETHING_ELSE);
}
-} // namespace kernel_selector
\ No newline at end of file
+} // namespace kernel_selector
#include "include/data_types.cl"
KERNEL(activation)(
-#if GRADIENT
- __global UNIT_TYPE* input_grad,
- __global UNIT_TYPE* output,
- __global UNIT_TYPE* input
-#else
- __global UNIT_TYPE* input,
- __global UNIT_TYPE* output
+ __global INPUT0_TYPE* input,
+ __global OUTPUT_TYPE* output
+#if HAS_FUSED_OPS_DECLS
+ , FUSED_OPS_DECLS
#endif
)
{
const unsigned int x = (uint)get_global_id(0) * NUM_COLS_WI;
+#if OUTPUT_DIMS == 5
+ const unsigned int fo_x = x % OUTPUT_SIZE_X;
+ const unsigned int fo_y = x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
+ const unsigned int fo_z = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z;
+ const unsigned int fo_f = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM;
+ const unsigned int fo_b = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM);
+#elif OUTPUT_DIMS == 4
+ const unsigned int fo_x = x % OUTPUT_SIZE_X;
+ const unsigned int fo_y = x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
+ const unsigned int fo_f = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM;
+ const unsigned int fo_b = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_FEATURE_NUM);
+#endif
unsigned int input_offset = x + INPUT0_OFFSET;
unsigned int output_offset = x + OUTPUT_OFFSET;
- typedef CAT(UNIT_TYPE, 4) type_t;
-#if GRADIENT
- type_t g = ((__global type_t*) (input_grad + input_offset))[0];
-#endif
- type_t v = ((__global type_t*) (input + input_offset))[0];
+ typedef CAT(INPUT0_TYPE, 4) input_t;
+ typedef CAT(OUTPUT_TYPE, 4) output_t;
-#if GRADIENT
- v = ACTIVATION(g, v, ACTIVATION_PARAMS);
+ input_t v = ((__global input_t*) (input + input_offset))[0];
+
+ v = ACTIVATION_KERNEL(v, ACTIVATION_PARAMS_KERNEL);
+#if HAS_FUSED_OPS
+ FUSED_OPS;
+ *((__global output_t*)(output + output_offset)) = FUSED_OPS_RESULT;
#else
- v = ACTIVATION(v, ACTIVATION_PARAMS);
+ *((__global output_t*)(output + output_offset)) = v;
#endif
-
- *((__global type_t*)(output + output_offset)) = v;
}
// TODO: move it from layout based to memory based
KERNEL(activation)(
-#if GRADIENT
- __global UNIT_TYPE* input_grad,
- __global UNIT_TYPE* output_grad,
- __global UNIT_TYPE* input
-#else
- __global UNIT_TYPE* input,
- __global UNIT_TYPE* output
-#endif
+ __global INPUT0_TYPE* input,
+ __global OUTPUT_TYPE* output
#ifdef PARAMETERIZED
, __global ADDITIONAL_PARAMS_TYPE* params
#endif
+#if HAS_FUSED_OPS_DECLS
+ , FUSED_OPS_DECLS
+#endif
)
{
#if OUTPUT_DIMS == 5
#define ORDER batch,feature,y,x
#endif
-#if defined OUTPUT_LAYOUT_BFZYX
+#if OUTPUT_DIMS == 5
const unsigned x = get_global_id(0);
const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y;
const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y;
#endif
#endif
-#if GRADIENT
- const unsigned src_grad_index = GET_INDEX(INPUT,0,ORDER);
- const unsigned src_index = GET_INDEX(INPUT,1,ORDER);
-#else
const unsigned src_index = GET_INDEX(INPUT,0,ORDER);
-#endif
const unsigned dst_index = GET_INDEX(OUTPUT,,ORDER);
#if defined PARAMETERIZED
#endif
#define PARAMETERIZED_ACTIVATION_PARAMS NL_M_PARAMETERIZED, NL_N_PARAMETERIZED
- #if GRADIENT
- output_grad[dst_index] = ACTIVATION(input_grad[src_grad_index], input[src_index], PARAMETERIZED_ACTIVATION_PARAMS);
+ INPUT0_TYPE dst = ACTIVATION_KERNEL(input[src_index], PARAMETERIZED_ACTIVATION_PARAMS);
+ #if HAS_FUSED_OPS
+ FUSED_OPS;
+ output[dst_index] = FUSED_OPS_RESULT;
#else
- output[dst_index] = ACTIVATION(input[src_index], PARAMETERIZED_ACTIVATION_PARAMS);
+ output[dst_index] = dst;
#endif
#else
- #if GRADIENT
- output_grad[dst_index] = ACTIVATION(input_grad[src_grad_index], input[src_index], ACTIVATION_PARAMS);
+ INPUT0_TYPE dst = ACTIVATION_KERNEL(input[src_index], ACTIVATION_PARAMS);
+ #if HAS_FUSED_OPS
+ FUSED_OPS;
+ output[dst_index] = FUSED_OPS_RESULT;
#else
- output[dst_index] = ACTIVATION(input[src_index], ACTIVATION_PARAMS);
+ output[dst_index] = dst;
#endif
#endif
}
__attribute__((opencl_unroll_hint(INPUT_BLOCK_SIZE)))
for (int i = 0; i < INPUT_BLOCK_SIZE; i++)
{
- const uint in_elem = i * SUB_GROUP_SIZE + lid;
- const uint xb = in_elem % INPUT_LINE_SIZE;
- const uint yb = in_elem / INPUT_LINE_SIZE;
+ const int in_elem = i * SUB_GROUP_SIZE + lid;
+ const int xb = in_elem % INPUT_LINE_SIZE;
+ const int yb = in_elem / INPUT_LINE_SIZE;
if (input_y + yb >= 0 && input_y + yb < INPUT0_SIZE_Y &&
input_x + xb >= 0 && input_x + xb < INPUT0_SIZE_X)
line_cache[ic * INPUT_BLOCK_SIZE + i] = input[input_offset +
#else // OUTPUT_FEATURE_NUM > 16
const uint dst_index = OUTPUT_GET_INDEX(b, fg*OSV + lid, y, x+i);
if (x + i < OUTPUT_SIZE_X && fg*OSV + lid < OUTPUT_FEATURE_NUM) {
- output[dst_index] = dst[ofm][i];
+ output[dst_index] = dst[0][i];
}
#endif // OUTPUT_FEATURE_NUM > 16
}
*******************************************************************************/
#include "include/include_all.cl"
-#include "include/unit_type.cl"
+#include "include/data_types.cl"
+
+#include "deconvolution_gpu_imad_common.cl"
#define unroll_for __attribute__((opencl_unroll_hint)) for
#define FEATURE_SLICE_SIZE 16
+
#if X_BLOCK_SIZE == 1
- #define BLOCK_TYPE UNIT_TYPE
- #define DST_VAR dst
+ #define GET_VEC_ELEM(var, idx) var
+#else
+ #define GET_VEC_ELEM(var, idx) var[idx]
+#endif
+
+#define ACCUMULATOR_BLOCK_TYPE MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, X_BLOCK_SIZE)
+#define ACTIVATION_BLOCK_TYPE MAKE_VECTOR_TYPE(ACTIVATION_TYPE, X_BLOCK_SIZE)
+#define OUTPUT_BLOCK_TYPE MAKE_VECTOR_TYPE(OUTPUT_TYPE, X_BLOCK_SIZE)
+
+#define TO_ACTIVATION_BLOCK_TYPE(x) CAT(convert_, ACTIVATION_BLOCK_TYPE)(x)
+#define TO_OUTPUT_BLOCK_TYPE(x) CAT(convert_, OUTPUT_BLOCK_TYPE)(x)
+
+#if FILTER_TYPE_SIZE == 1
+DECLARE_READ_BLOCK_16(load_weights, FILTER_TYPE)
+#elif FILTER_TYPE_SIZE == 2
+DECLARE_READ_BLOCK_8(load_weights, FILTER_TYPE)
+#else
+DECLARE_READ_BLOCK_4(load_weights, FILTER_TYPE)
+#endif
+
+#if OUTPUT_TYPE_SIZE == 1
+DECLARE_STORE_BLOCK_16(store_output, OUTPUT_TYPE)
#else
- #define BLOCK_TYPE CAT(UNIT_TYPE, X_BLOCK_SIZE)
- #define DST_VAR dst[x_block]
+DECLARE_STORE_BLOCK_8(store_output, OUTPUT_TYPE)
+#endif
+
+#if PRELOAD_INPUT_LINE
+# if INPUT0_TYPE_SIZE
+DECLARE_READ_BLOCK_16(preload_input, INPUT0_TYPE)
+# else
+DECLARE_READ_BLOCK_8(preload_input, INPUT0_TYPE)
+# endif
+#endif
+
+#if PRELOAD_WEIGHTS_LINE
+# if FILTER_TYPE_SIZE == 1
+DECLARE_READ_BLOCK_16(preload_weights, FILTER_TYPE)
+# else
+DECLARE_READ_BLOCK_8(preload_weights, FILTER_TYPE)
+# endif
#endif
__attribute__((intel_reqd_sub_group_size(FEATURE_SLICE_SIZE))) // attr:no-format
+__attribute__((reqd_work_group_size(1, FEATURE_SLICE_SIZE, 1)))
KERNEL(deconvolution_gpu_b_fs_zyx_fsv16_dw)(
const __global INPUT0_TYPE *input,
__global OUTPUT_TYPE *output,
#if BIAS_TERM
const __global BIAS_TYPE *bias,
#endif
- uint split_idx)
+#if HAS_FUSED_OPS_DECLS
+ FUSED_OPS_DECLS,
+#endif
+ uint split_idx
+ )
{
const uint zyx = (uint)get_global_id(0);
- const uint x = (zyx % (OUTPUT_SIZE_X / X_BLOCK_SIZE)) * X_BLOCK_SIZE;
-#if INPUT0_LAYOUT_B_FS_YX_FSV16
- const uint y = zyx / (OUTPUT_SIZE_X / X_BLOCK_SIZE);
+ const uint x = (zyx % (CEIL_DIV(OUTPUT_SIZE_X, X_BLOCK_SIZE))) * X_BLOCK_SIZE;
+#if OUTPUT_DIMS <= 4
+ const uint y = zyx / (CEIL_DIV(OUTPUT_SIZE_X, X_BLOCK_SIZE));
const uint z = 0;
#else
- const uint zy = zyx / (OUTPUT_SIZE_X / X_BLOCK_SIZE);
+ const uint zy = zyx / (CEIL_DIV(OUTPUT_SIZE_X, X_BLOCK_SIZE));
const uint y = zy % OUTPUT_SIZE_Y;
const uint z = zy / OUTPUT_SIZE_Y;
#endif
const uint f_block = get_group_id(1);
const uint sglid = get_sub_group_local_id();
- const uint f = f_block * FEATURE_SLICE_SIZE + sglid;
+ const uint fg = f_block * FEATURE_SLICE_SIZE;
+ const uint f = fg + sglid;
const uint b = (uint)get_global_id(2);
const int input_x = x + PADDING_SIZE_X - (FILTER_SIZE_X - 1);
const uint filter_offset = f_block * FEATURE_SLICE_SIZE * FILTER_SIZE_X * FILTER_SIZE_Y * FILTER_SIZE_Z;
-#if BIAS_TERM
- BLOCK_TYPE dst = (BLOCK_TYPE)(UNIT_BLOCK_READ(bias, f_block * FEATURE_SLICE_SIZE));
+#if BIAS_TERM && ACCUMULATOR_IS_FP
+ ACCUMULATOR_BLOCK_TYPE dst = (ACCUMULATOR_BLOCK_TYPE)(DT_BIAS_BLOCK_READ(bias, f_block * FEATURE_SLICE_SIZE));
#else
- BLOCK_TYPE dst = (BLOCK_TYPE)(UNIT_VAL_ZERO);
+ ACCUMULATOR_BLOCK_TYPE dst = (ACCUMULATOR_BLOCK_TYPE)(ACCUMULATOR_VAL_ZERO);
#endif
- UNIT_TYPE wei[FILTER_SIZE_Z * FILTER_SIZE_Y * FILTER_SIZE_X];
+#if PRELOAD_WEIGHTS
+ FILTER_TYPE wei[FILTER_SIZE_Z * FILTER_SIZE_Y * FILTER_SIZE_X];
+
+ FUNC_CALL(load_weights)(weights, filter_offset, FILTER_SIZE_X * FILTER_SIZE_Y * FILTER_SIZE_Z, wei);
+#endif
+
+ INPUT0_TYPE src_val = INPUT0_VAL_ZERO;
+
+#if PRELOAD_INPUT_LINE
+ int first_input_x = input_x;
+ if (first_input_x % STRIDE_SIZE_X != 0) {
+ if (first_input_x >= 0)
+ first_input_x = ALIGN(first_input_x, STRIDE_SIZE_X);
+ else
+ first_input_x = first_input_x / STRIDE_SIZE_X * STRIDE_SIZE_X;
+ }
+ first_input_x = first_input_x / STRIDE_SIZE_X;
unroll_for (uint k_z = 0; k_z < FILTER_SIZE_Z; k_z++) {
+ const int input_offset_z = input_z + k_z;
+ const bool zero_z = (input_offset_z >= INPUT0_SIZE_Z * STRIDE_SIZE_Z) || (input_offset_z < 0) || ((input_offset_z % STRIDE_SIZE_Z) != 0);
unroll_for (uint k_y = 0; k_y < FILTER_SIZE_Y; k_y++) {
- unroll_for (uint k_x = 0; k_x < FILTER_SIZE_X; k_x++) {
- const uint wei_idx = (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH + (FILTER_SIZE_X - k_x - 1);
- wei[wei_idx] = UNIT_BLOCK_READ(weights, filter_offset + k_z * FILTER_Z_PITCH * FEATURE_SLICE_SIZE
- + k_y * FILTER_Y_PITCH * FEATURE_SLICE_SIZE
- + k_x * FEATURE_SLICE_SIZE);
- }
- }
- }
+ const int input_offset_y = input_y + k_y;
+ const bool zero_y = (input_offset_y >= INPUT0_SIZE_Y * STRIDE_SIZE_Y) || (input_offset_y < 0) || ((input_offset_y % STRIDE_SIZE_Y) != 0);
+ if (!zero_y && !zero_z) {
+ INPUT0_TYPE input_line[INPUT_BLOCK_SIZE_X] = { };
+ uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
+ uint fixed_input_offset_z = (uint)input_offset_z / STRIDE_SIZE_Z;
+ uint preload_input_offset = input_offset + fixed_input_offset_z * input_z_pitch +
+ fixed_input_offset_y * input_y_pitch;
- UNIT_TYPE src_val = UNIT_VAL_ZERO;
+ if (first_input_x >= 0) {
+ FUNC_CALL(preload_input)(input, preload_input_offset + first_input_x * input_x_pitch, INPUT_BLOCK_SIZE_X, input_line);
+ } else {
+ unroll_for (uint xi = 0; xi < INPUT_BLOCK_SIZE_X; ++xi) {
+ if (first_input_x + xi >= 0) {
+ input_line[xi] = DT_INPUT_BLOCK_READ(input, preload_input_offset + first_input_x * input_x_pitch + xi * input_x_pitch);
+ } else {
+ input_line[xi] = 0;
+ }
+ }
+ }
+
+#if PRELOAD_WEIGHTS_LINE
+ FILTER_TYPE wei[FILTER_SIZE_X] = { };
+ FUNC_CALL(preload_weights)(weights,
+ filter_offset + (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH * FEATURE_SLICE_SIZE
+ + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH * FEATURE_SLICE_SIZE,
+ FILTER_SIZE_X,
+ wei);
+#endif
+ unroll_for (uint k_x = 0; k_x < FILTER_SIZE_X; k_x++) {
+# if PRELOAD_WEIGHTS
+ const uint in_idx = (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH + (FILTER_SIZE_X - k_x - 1);
+ FILTER_TYPE wei_val = wei[in_idx];
+# elif PRELOAD_WEIGHTS_LINE
+ FILTER_TYPE wei_val = wei[(FILTER_SIZE_X - k_x - 1)];
+# else
+ const uint in_idx = (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH + (FILTER_SIZE_X - k_x - 1);
+ FILTER_TYPE wei_val = DT_FILTER_BLOCK_READ(weights, filter_offset + in_idx * FEATURE_SLICE_SIZE);
+# endif
+ unroll_for (uint x_block = 0; x_block < X_BLOCK_SIZE; x_block++) {
+ const int input_offset_x = input_x + k_x + x_block;
+ const bool zero_x = (input_offset_x >= INPUT0_SIZE_X * STRIDE_SIZE_X) || (input_offset_x < 0) || ((input_offset_x % STRIDE_SIZE_X) != 0);
+ if (!zero_x) {
+ src_val = input_line[(x_block + k_x) / STRIDE_SIZE_X];
+ GET_VEC_ELEM(dst, x_block) += src_val * wei_val;
+ } // if !zero_x
+ } // for X_BLOCK_SIZE
+ } // for FILTER_SIZE_X
+ } // if !zero_y && !zero_z
+ } // for FILTER_SIZE_Y
+ } // for FILTER_SIZE_Z
+#else
unroll_for (uint x_block = 0; x_block < X_BLOCK_SIZE; x_block++) {
unroll_for (uint k_z = 0; k_z < FILTER_SIZE_Z; k_z++) {
const int input_offset_z = input_z + k_z;
unroll_for (uint k_x = 0; k_x < FILTER_SIZE_X; k_x++) {
const int input_offset_x = input_x + k_x + x_block;
const bool zero_x = (input_offset_x >= INPUT0_SIZE_X * STRIDE_SIZE_X) || (input_offset_x < 0) || ((input_offset_x % STRIDE_SIZE_X) != 0);
- const uint in_idx = k_z * FILTER_Z_PITCH + k_y * FILTER_Y_PITCH + k_x;
+ const uint in_idx = (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH + (FILTER_SIZE_X - k_x - 1);
if (!zero_z && !zero_y && !zero_x) {
uint fixed_input_offset_x = (uint)input_offset_x / STRIDE_SIZE_X;
uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
uint fixed_input_offset_z = (uint)input_offset_z / STRIDE_SIZE_Z;
- src_val = UNIT_BLOCK_READ(input, input_offset +
- fixed_input_offset_z * input_z_pitch +
- fixed_input_offset_y * input_y_pitch +
- fixed_input_offset_x * input_x_pitch);
- DST_VAR = mad(src_val, wei[in_idx], DST_VAR);
- }
- }
- }
- }
- }
+ src_val = DT_INPUT_BLOCK_READ(input, input_offset +
+ fixed_input_offset_z * input_z_pitch +
+ fixed_input_offset_y * input_y_pitch +
+ fixed_input_offset_x * input_x_pitch);
+# if PRELOAD_WEIGHTS
+ FILTER_TYPE wei_val = wei[in_idx];
+# else
+ FILTER_TYPE wei_val = DT_FILTER_BLOCK_READ(weights, filter_offset + in_idx * FEATURE_SLICE_SIZE);
+# endif
+ GET_VEC_ELEM(dst, x_block) += src_val * wei_val;
+ } // if !zero_z && !zero_y && !zero_x
+ } // for FILTER_SIZE_X
+ } // for FILTER_SIZE_Y
+ } // for FILTER_SIZE_Z
+ } // for X_BLOCK_SIZE
+#endif
- dst = ACTIVATION(dst, ACTIVATION_PARAMS);
+ ACTIVATION_BLOCK_TYPE dequantized = TO_ACTIVATION_BLOCK_TYPE(dst);
+#if BIAS_TERM && !ACCUMULATOR_IS_FP
+ dequantized += TO_ACTIVATION_TYPE(DT_BIAS_BLOCK_READ(bias, f_block * FEATURE_SLICE_SIZE));
+#endif
+
+ OUTPUT_BLOCK_TYPE result;
+#if HAS_FUSED_OPS
+ FUSED_OPS;
+ result = FUSED_OPS_RESULT;
+#else
+ result = TO_OUTPUT_BLOCK_TYPE(ACTIVATION(dequantized, ACTIVATION_PARAMS));
+#endif
const uint output_x_pitch = FEATURE_SLICE_SIZE;
const uint output_y_pitch = output_x_pitch * (OUTPUT_PAD_BEFORE_SIZE_X + OUTPUT_SIZE_X + OUTPUT_PAD_AFTER_SIZE_X);
if ((f_block + 1) * FEATURE_SLICE_SIZE >= OUTPUT_FEATURE_NUM)
{
unroll_for (uint x_block = 0; x_block < X_BLOCK_SIZE; x_block++) {
+ if (OUTPUT_SIZE_X % X_BLOCK_SIZE != 0 && x + X_BLOCK_SIZE >= OUTPUT_SIZE_X && x_block >= OUTPUT_SIZE_X % X_BLOCK_SIZE)
+ break;
if (f_block * FEATURE_SLICE_SIZE + sglid < OUTPUT_FEATURE_NUM)
- output[output_offset + (x + x_block) * output_x_pitch + sglid] = DST_VAR;
+ output[output_offset + (x + x_block) * output_x_pitch + sglid] = GET_VEC_ELEM(result, x_block);
}
}
else
#endif // OUTPUT_LEFTOVERS
+#if OUTPUT_SIZE_X % X_BLOCK_SIZE != 0
+ if (x + X_BLOCK_SIZE >= OUTPUT_SIZE_X) {
+ FUNC_CALL(store_output)(output, output_offset + x * output_x_pitch, OUTPUT_SIZE_X % X_BLOCK_SIZE, (OUTPUT_TYPE *)&result);
+ } else
+#endif
{
- unroll_for (uint x_block = 0; x_block < X_BLOCK_SIZE; x_block++) {
- UNIT_BLOCK_WRITE(output, output_offset + (x + x_block) * output_x_pitch, DST_VAR);
- }
+ FUNC_CALL(store_output)(output, output_offset + x * output_x_pitch, X_BLOCK_SIZE, (OUTPUT_TYPE *)&result);
}
}
+
+#undef unroll_for
+#undef FEATURE_SLICE_SIZE
+
+#undef GET_VEC_ELEM
+
+#undef ACCUMULATOR_BLOCK_TYPE
+#undef ACTIVATION_BLOCK_TYPE
+#undef OUTPUT_BLOCK_TYPE
+
+#undef TO_ACTIVATION_BLOCK_TYPE
+#undef TO_OUTPUT_BLOCK_TYPE
#if BIAS_TERM
const __global BIAS_TYPE* bias,
#endif
- uint split_idx
-#if FUSED_ELTWISE
- , const __global UNIT_TYPE* fuse_input
+#if HAS_FUSED_OPS_DECLS
+ FUSED_OPS_DECLS,
#endif
- )
+ uint split_idx
+ )
{
- UNIT_TYPE result = UNIT_VAL_ZERO;
+ ACCUMULATOR_TYPE acc = ACCUMULATOR_VAL_ZERO;
const uint b_f = get_global_id(2);
const uint batch_offset = b_f / OUTPUT_FEATURE_NUM;
uint filter_idx = filter_offset + of*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
for (uint h = 0; h < FILTER_OFM_NUM; h++)
{
- result = fma(input[input_idx], filter[filter_idx], result);
+ acc += TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(filter[filter_idx]);
filter_idx += FILTER_OFM_PITCH;
input_idx += INPUT0_FEATURE_PITCH;
}
uint filter_idx = filter_offset + of*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
for (uint h = 0; h < FILTER_IFM_NUM; h++)
{
- result = fma(input[input_idx], filter[filter_idx], result);
+ acc += TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(filter[filter_idx]);
filter_idx += FILTER_IFM_PITCH;
input_idx += INPUT0_FEATURE_PITCH;
}
}
}
}
+
+ ACTIVATION_TYPE result = TO_ACTIVATION_TYPE(acc);
#if BIAS_TERM
result += bias[ofm_offset];
#endif
+ result = ACTIVATION(result, ACTIVATION_PARAMS);
+
const uint out_split_offset = g * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
const uint dst_index = OUTPUT_OFFSET + out_split_offset + batch_offset*OUTPUT_BATCH_PITCH + of*OUTPUT_FEATURE_PITCH + id_y*OUTPUT_Y_PITCH + id_x*OUTPUT_X_PITCH;
-#if FUSED_ELTWISE
- const uint fused_index = INPUT1_OFFSET + g * INPUT1_FEATURE_PITCH * FILTER_OFM_NUM + batch_offset*INPUT1_BATCH_PITCH + of*INPUT1_FEATURE_PITCH + id_y*INPUT1_Y_PITCH + id_x*INPUT1_X_PITCH;
-#if !GRADIENT
- output[dst_index] = ACTIVATION(result + fuse_input[fused_index], ACTIVATION_PARAMS);
-#else
- output[dst_index] = result + fuse_input[fused_index];
-#endif
+
+#if HAS_FUSED_OPS
+ FUSED_OPS;
+ output[dst_index] = FUSED_OPS_RESULT;
#else
- output[dst_index] = ACTIVATION(result, ACTIVATION_PARAMS);
+ output[dst_index] = TO_OUTPUT_TYPE(result);
#endif
}
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/data_types.cl"
+
+#include "deconvolution_gpu_imad_common.cl"
+
+DECLARE_LOAD_CONTINOUS_4(load_input_ui, uint)
+DECLARE_LOAD_CONTINOUS_4(load_weights_ui, uint)
+
+#if OUTPUT_TYPE_SIZE == 1
+DECLARE_STORE_BLOCK_16(store_output, OUTPUT_TYPE)
+#elif OUTPUT_TYPE_SIZE == 2
+DECLARE_STORE_BLOCK_8(store_output, OUTPUT_TYPE)
+#else
+DECLARE_STORE_BLOCK_4(store_output, OUTPUT_TYPE)
+#endif
+
+#define FILTER_TYPE4 MAKE_VECTOR_TYPE(FILTER_TYPE, 4)
+#define INPUT_TYPE4 MAKE_VECTOR_TYPE(INPUT0_TYPE, 4)
+
+#define AS_FILTER_TYPE4 CAT(as_, FILTER_TYPE4)
+#define AS_INPUT_TYPE4 CAT(as_, INPUT_TYPE4)
+
+#define WEIGHTS_GET_INDEX(g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(FILTER, g, o, i, z, y, x, (SIMD * TILE_OFM), TILE_IFM)
+#define WEIGHTS_TILE_IFM_PITCH (TILE_IFM * SIMD * TILE_OFM)
+#define WEIGHTS_IN_TILE_OFM_PITCH (TILE_IFM * SIMD)
+
+__attribute__((reqd_work_group_size(1, SIMD, 1)))
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+KERNEL(deconvolution_gpu_imad_ref)(
+ const __global INPUT0_TYPE* input,
+ __global OUTPUT_TYPE* restrict output,
+ const __global FILTER_TYPE* weights,
+#if BIAS_TERM
+ const __global BIAS_TYPE* bias,
+#endif
+#if HAS_FUSED_OPS_DECLS
+ FUSED_OPS_DECLS,
+#endif
+ uint split_idx
+) {
+ const __global uint* input_ui = (const __global uint*)input;
+ const __global uint* weights_ui = (const __global uint*)weights;
+
+ uint out_b = get_global_id(2) * TILE_B;
+ uint out_fg = get_group_id(1) * SIMD * TILE_OFM;
+ uint out_f = out_fg + get_sub_group_local_id();
+ uint out_x = (uint)get_global_id(0) * TILE_X % ALIGN(OUTPUT_SIZE_X, TILE_X);
+#if OUTPUT_DIMS <= 4
+ uint out_y = (uint)get_global_id(0) / CEIL_DIV(OUTPUT_SIZE_X, TILE_X);
+ uint out_z = 0;
+#elif OUTPUT_DIMS == 5
+ uint out_y = (uint)get_global_id(0) / CEIL_DIV(OUTPUT_SIZE_X, TILE_X) % OUTPUT_SIZE_Y;
+ uint out_z = (uint)get_global_id(0) / (CEIL_DIV(OUTPUT_SIZE_X, TILE_X) * OUTPUT_SIZE_Y);
+#endif
+ const uint sglid = get_sub_group_local_id();
+
+#if GROUPED
+ uint group = out_fg / FILTER_OFM_NUM;
+ uint ofm = out_fg % FILTER_OFM_NUM + sglid;
+#else
+ uint group = 0;
+ uint ofm = out_f;
+#endif
+ uint if_start = group * FILTER_IFM_NUM;
+
+ int in_x_start = (int)out_x + (PADDING_SIZE_X - FILTER_SIZE_X + 1);
+ int in_y_start = (int)out_y + (PADDING_SIZE_Y - FILTER_SIZE_Y + 1);
+ int in_z_start = (int)out_z + (PADDING_SIZE_Z - FILTER_SIZE_Z + 1);
+
+ uint fy_start = 0;
+ uint fy_end = FILTER_SIZE_Y;
+ uint fy_inc = STRIDE_SIZE_Y;
+ if (in_y_start < 0)
+ fy_start = -in_y_start;
+ else if (in_y_start % STRIDE_SIZE_Y != 0)
+ fy_start = STRIDE_SIZE_Y - in_y_start % STRIDE_SIZE_Y;
+ if (in_y_start + FILTER_SIZE_Y - 1 >= INPUT0_SIZE_Y * STRIDE_SIZE_Y)
+ fy_end = INPUT0_SIZE_Y * STRIDE_SIZE_Y - in_y_start;
+
+ uint fz_start = 0;
+ uint fz_end = FILTER_SIZE_Z;
+ uint fz_inc = STRIDE_SIZE_Z;
+ if (in_z_start < 0)
+ fz_start = -in_z_start;
+ else if (in_z_start % STRIDE_SIZE_Z != 0)
+ fz_start = STRIDE_SIZE_Z - in_z_start % STRIDE_SIZE_Z;
+ if (in_z_start + FILTER_SIZE_Z - 1 >= INPUT0_SIZE_Z * STRIDE_SIZE_Z)
+ fz_end = INPUT0_SIZE_Z * STRIDE_SIZE_Z - in_z_start;
+
+ ACCUMULATOR_TYPE acc[TILE_B][TILE_OFM][TILE_X] = { };
+ uint in[TILE_B][TILE_IFM / 4];
+ uint wei[TILE_OFM][TILE_IFM / 4];
+
+ for (uint fz = fz_start; fz < fz_end; fz += fz_inc) {
+ int in_z = in_z_start + fz;
+ uint fixed_in_z = in_z / STRIDE_SIZE_Z;
+
+ for (uint fy = fy_start; fy < fy_end; fy += fy_inc) {
+ int in_y = in_y_start + fy;
+ uint fixed_in_y = in_y / STRIDE_SIZE_Y;
+
+ for (uint fx = 0; fx < FILTER_SIZE_X; fx += 1) {
+ int in_x = in_x_start + fx + ((TILE_X == SIMD || sglid < TILE_X) ? sglid : 0);
+ bool zero_x = false;
+ zero_x |= in_x < 0;
+ zero_x |= in_x >= INPUT0_SIZE_X * STRIDE_SIZE_X;
+ zero_x |= in_x % STRIDE_SIZE_X != 0;
+ in_x = max(in_x, 0);
+ in_x = min(in_x, INPUT0_SIZE_X * STRIDE_SIZE_X);
+ uint fixed_in_x = in_x / STRIDE_SIZE_X;
+
+ uint weights_offset = WEIGHTS_GET_INDEX(group, ofm, 0, FILTER_SIZE_Z - fz - 1, FILTER_SIZE_Y - fy - 1, FILTER_SIZE_X - fx - 1) / 4;
+
+#if INPUT_VALID_TILE_IFM_PITCH
+# if OUTPUT_DIMS <= 4
+ uint input_offset = INPUT0_GET_INDEX(out_b, if_start, fixed_in_y, fixed_in_x) / 4;
+# elif OUTPUT_DIMS == 5
+ uint input_offset = INPUT0_GET_INDEX(out_b, if_start, fixed_in_z, fixed_in_y, fixed_in_x) / 4;
+# endif
+#endif
+
+ for (uint fi = 0; fi < FILTER_IFM_NUM; fi += TILE_IFM) {
+ // Load weights [TILE_OFM, TILE_IFM, 1, 1]
+ __attribute__((opencl_unroll_hint))
+ for (uint of = 0; of < TILE_OFM; ++of) {
+ uint weights_idx = weights_offset + of * WEIGHTS_IN_TILE_OFM_PITCH / 4;
+ FUNC_CALL(load_weights_ui)(weights_ui, weights_idx, TILE_IFM / 4, wei[of]);
+ }
+ weights_offset += WEIGHTS_TILE_IFM_PITCH / 4;
+
+ // Load input [TILE_B, TILE_IFM, 1, 1]
+#if !INPUT_VALID_TILE_IFM_PITCH
+# if OUTPUT_DIMS <= 4
+ uint input_offset = INPUT0_GET_INDEX(out_b, if_start + fi, fixed_in_y, fixed_in_x) / 4;
+# elif OUTPUT_DIMS == 5
+ uint input_offset = INPUT0_GET_INDEX(out_b, if_start + fi, fixed_in_z, fixed_in_y, fixed_in_x) / 4;
+# endif
+#endif
+ __attribute__((opencl_unroll_hint))
+ for (uint ob = 0; ob < TILE_B; ++ob) {
+ uint input_idx = input_offset + ob * INPUT_IN_TILE_B_PITCH / 4;
+ FUNC_CALL(load_input_ui)(input_ui, input_idx, TILE_IFM / 4, in[ob]);
+ }
+#if INPUT_VALID_TILE_IFM_PITCH
+ input_offset += INPUT_TILE_IFM_PITCH / 4;
+#endif
+ if (zero_x) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ob = 0; ob < TILE_B; ++ob) {
+ __attribute__((opencl_unroll_hint))
+ for (uint ifp = 0; ifp < TILE_IFM / 4; ++ifp) {
+ in[ob][ifp] = 0;
+ }
+ }
+ }
+
+ __attribute__((opencl_unroll_hint))
+ for (uint ob = 0; ob < TILE_B; ++ob) {
+ __attribute__((opencl_unroll_hint))
+ for (uint of = 0; of < TILE_OFM; ++of) {
+ __attribute__((opencl_unroll_hint))
+ for (uint tx = 0; tx < TILE_X; ++tx) {
+ __attribute__((opencl_unroll_hint))
+ for (uint imad_it = 0; imad_it < TILE_IFM / 4; ++imad_it) {
+ uint in_val = intel_sub_group_shuffle(in[ob][imad_it], tx);
+ acc[ob][of][tx] = IMAD(acc[ob][of][tx], AS_INPUT_TYPE4(in_val), AS_FILTER_TYPE4(wei[of][imad_it]));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ ACTIVATION_TYPE dequantized[TILE_B][TILE_OFM][TILE_X];
+ __attribute__((opencl_unroll_hint))
+ for (uint ob = 0; ob < TILE_B; ++ob) {
+ __attribute__((opencl_unroll_hint))
+ for (uint of = 0; of < TILE_OFM; ++of) {
+ __attribute__((opencl_unroll_hint))
+ for (uint tx = 0; tx < TILE_X; ++tx) {
+ dequantized[ob][of][tx] = TO_ACTIVATION_TYPE(acc[ob][of][tx]);
+ }
+ }
+ }
+
+#if BIAS_TERM
+ __attribute__((opencl_unroll_hint))
+ for (uint of = 0; of < TILE_OFM; ++of) {
+ BIAS_TYPE bias_val = bias[out_f + of * SIMD];
+ __attribute__((opencl_unroll_hint))
+ for (uint ob = 0; ob < TILE_B; ++ob) {
+ __attribute__((opencl_unroll_hint))
+ for (uint tx = 0; tx < TILE_X; ++tx) {
+ dequantized[ob][of][tx] += TO_ACTIVATION_TYPE(bias_val);
+ }
+ }
+ }
+#endif
+
+ OUTPUT_TYPE result[TILE_B][TILE_OFM][TILE_X];
+ __attribute__((opencl_unroll_hint))
+ for (uint of = 0; of < TILE_OFM; ++of) {
+#if FUSED_OPS_CAN_USE_PRELOAD
+ FUSED_OPS_PRELOAD;
+#endif
+ __attribute__((opencl_unroll_hint))
+ for (uint ob = 0; ob < TILE_B; ++ob) {
+ __attribute__((opencl_unroll_hint))
+ for (uint tx = 0; tx < TILE_X; ++tx) {
+#if HAS_FUSED_OPS
+# if FUSED_OPS_CAN_USE_PRELOAD
+ FUSED_OPS_CALC;
+# else
+ FUSED_OPS;
+# endif
+ result[ob][of][tx] = FUSED_OPS_RESULT;
+#else
+ result[ob][of][tx] = TO_OUTPUT_TYPE(dequantized[ob][of][tx]);
+#endif
+ }
+ }
+ }
+
+ bool leftovers_x = OUTPUT_SIZE_X % TILE_X != 0 && out_x + TILE_X >= OUTPUT_SIZE_X;
+ bool leftovers_f = OUTPUT_FEATURE_NUM % SIMD != 0 && out_f + SIMD >= OUTPUT_FEATURE_NUM;
+
+#if OUTPUT_NAIVE_STORE
+ __attribute__((opencl_unroll_hint))
+ for (uint ob = 0; ob < TILE_B; ++ob) {
+ __attribute__((opencl_unroll_hint))
+ for (uint of = 0; of < TILE_OFM; ++of) {
+ __attribute__((opencl_unroll_hint))
+ for (uint tx = 0; tx < TILE_X; ++tx) {
+ if ((leftovers_x && tx >= OUTPUT_SIZE_X % TILE_X) ||
+ (leftovers_f && out_f + of * SIMD >= OUTPUT_FEATURE_NUM))
+ break;
+#if OUTPUT_DIMS <= 4
+ uint output_idx = OUTPUT_GET_INDEX(out_b + ob, out_f + of * SIMD, out_y, out_x + tx);
+#elif OUTPUT_DIMS == 5
+ uint output_idx = OUTPUT_GET_INDEX(out_b + ob, out_f + of * SIMD, out_z, out_y, out_x + tx);
+#endif
+ output[output_idx] = result[ob][of][tx];
+ }
+ }
+ }
+#elif OUTPUT_BLOCK_X_STORE
+ __attribute__((opencl_unroll_hint))
+ for (uint ob = 0; ob < TILE_B; ++ob) {
+ __attribute__((opencl_unroll_hint))
+ for (uint of = 0; of < TILE_OFM; ++of) {
+#if OUTPUT_DIMS <= 4
+ uint output_idx = OUTPUT_GET_INDEX(out_b + ob, out_fg + of * SIMD, out_y, out_x);
+#elif OUTPUT_DIMS == 5
+ uint output_idx = OUTPUT_GET_INDEX(out_b + ob, out_fg + of * SIMD, out_z, out_y, out_x);
+#endif
+ if (!leftovers_x && !leftovers_f) {
+ FUNC_CALL(store_output)(output, output_idx, TILE_X, result[ob][of]);
+ } else if (!leftovers_f) {
+ FUNC_CALL(store_output)(output, output_idx, OUTPUT_SIZE_X % TILE_X, result[ob][of]);
+ } else {
+ __attribute__((opencl_unroll_hint))
+ for (uint tx = 0; tx < TILE_X; ++tx) {
+ if (out_f + of * SIMD < OUTPUT_FEATURE_NUM && out_x + tx < OUTPUT_SIZE_X) {
+ output[output_idx + sglid + tx * SIMD] = result[ob][of][tx];
+ }
+ }
+ }
+ }
+ }
+#endif
+}
+
+#undef FILTER_TYPE4
+#undef INPUT_TYPE4
+#undef AS_FILTER_TYPE4
+#undef AS_INPUT_TYPE4
+
+#undef WEIGHTS_GET_INDEX
+#undef WEIGHTS_TILE_IFM_PITCH
+#undef WEIGHTS_IN_TILE_OFM_PITCH
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/data_types.cl"
+
+#define CEIL_DIV(a, b) (((a) + ((b) - 1)) / (b))
+#define ALIGN(a, b) (CEIL_DIV(a, b) * (b))
+
+#define VEC_TO_ARR_1(var, arr, idx) \
+ arr[idx] = var
+#define VEC_TO_ARR_2(vec, arr, idx) \
+ VEC_TO_ARR_1((vec).lo, arr, idx); \
+ VEC_TO_ARR_1((vec).hi, arr, (idx) + 1)
+#define VEC_TO_ARR_4(vec, arr, idx) \
+ VEC_TO_ARR_2((vec).lo, arr, idx); \
+ VEC_TO_ARR_2((vec).hi, arr, (idx) + 2)
+#define VEC_TO_ARR_8(vec, arr, idx) \
+ VEC_TO_ARR_4((vec).lo, arr, idx); \
+ VEC_TO_ARR_4((vec).hi, arr, (idx) + 4)
+#define VEC_TO_ARR_16(vec, arr, idx) \
+ VEC_TO_ARR_8((vec).lo, arr, idx); \
+ VEC_TO_ARR_8((vec).hi, arr, (idx) + 8)
+
+#define ARR_TO_VEC_1(arr, var, idx) \
+ var = arr[idx]
+#define ARR_TO_VEC_2(arr, vec, idx) \
+ ARR_TO_VEC_1(arr, (vec).lo, idx); \
+ ARR_TO_VEC_1(arr, (vec).hi, (idx) + 1)
+#define ARR_TO_VEC_4(arr, vec, idx) \
+ ARR_TO_VEC_2(arr, (vec).lo, idx); \
+ ARR_TO_VEC_2(arr, (vec).hi, (idx) + 2)
+#define ARR_TO_VEC_8(arr, vec, idx) \
+ ARR_TO_VEC_4(arr, (vec).lo, idx); \
+ ARR_TO_VEC_4(arr, (vec).hi, (idx) + 4)
+#define ARR_TO_VEC_16(arr, vec, idx) \
+ ARR_TO_VEC_8(arr, (vec).lo, idx); \
+ ARR_TO_VEC_8(arr, (vec).hi, (idx) + 8)
+
+#define DECLARE_LOAD_CONTINOUS_16(name, type) \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) { \
+ uint i = 0; \
+ for (; i + 16 <= size; i += 16) { \
+ MAKE_VECTOR_TYPE(type, 16) tmp = ((const __global MAKE_VECTOR_TYPE(type, 16)*)(src + offset + i))[0]; \
+ VEC_TO_ARR_16(tmp, dst, i); \
+ } \
+ if (size % 16 >= 8) { \
+ MAKE_VECTOR_TYPE(type, 8) tmp = ((const __global MAKE_VECTOR_TYPE(type, 8)*)(src + offset + i))[0]; \
+ VEC_TO_ARR_8(tmp, dst, i); \
+ i += 8; \
+ } \
+ if (size % 8 >= 4) { \
+ MAKE_VECTOR_TYPE(type, 4) tmp = ((const __global MAKE_VECTOR_TYPE(type, 4)*)(src + offset + i))[0]; \
+ VEC_TO_ARR_4(tmp, dst, i); \
+ i += 4; \
+ } \
+ if (size % 4 >= 2) { \
+ MAKE_VECTOR_TYPE(type, 2) tmp = ((const __global MAKE_VECTOR_TYPE(type, 2)*)(src + offset + i))[0]; \
+ VEC_TO_ARR_2(tmp, dst, i); \
+ i += 2; \
+ } \
+ if (size % 2 == 1) { \
+ dst[i] = src[offset + i]; \
+ } \
+}
+
+#define DECLARE_LOAD_CONTINOUS_4(name, type) \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) { \
+ uint i = 0; \
+ for (; i + 4 <= size; i += 4) { \
+ MAKE_VECTOR_TYPE(type, 4) tmp = ((const __global MAKE_VECTOR_TYPE(type, 4)*)(src + offset + i))[0]; \
+ VEC_TO_ARR_4(tmp, dst, i); \
+ } \
+ if (size % 4 >= 2) { \
+ MAKE_VECTOR_TYPE(type, 2) tmp = ((const __global MAKE_VECTOR_TYPE(type, 2)*)(src + offset + i))[0]; \
+ VEC_TO_ARR_2(tmp, dst, i); \
+ i += 2; \
+ } \
+ if (size % 2 == 1) { \
+ dst[i] = src[offset + i]; \
+ } \
+}
+
+#define DECLARE_STORE_BLOCK_16(name, type) \
+inline void FUNC(name)(__global type* dst, uint offset, uint size, type* src) { \
+ uint i = 0; \
+ const uint sg_size = get_max_sub_group_size(); \
+ for (; i + 16 <= size; i += 16) { \
+ MAKE_VECTOR_TYPE(type, 16) tmp; \
+ ARR_TO_VEC_16(src, tmp, i); \
+ BLOCK_WRITEN(type, 16, dst, offset + i * sg_size, tmp); \
+ } \
+ if (size % 16 >= 8) { \
+ MAKE_VECTOR_TYPE(type, 8) tmp; \
+ ARR_TO_VEC_8(src, tmp, i); \
+ BLOCK_WRITEN(type, 8, dst, offset + i * sg_size, tmp); \
+ i += 8; \
+ } \
+ if (size % 8 >= 4) { \
+ MAKE_VECTOR_TYPE(type, 4) tmp; \
+ ARR_TO_VEC_4(src, tmp, i); \
+ BLOCK_WRITEN(type, 4, dst, offset + i * sg_size, tmp); \
+ i += 4; \
+ } \
+ if (size % 4 >= 2) { \
+ MAKE_VECTOR_TYPE(type, 2) tmp; \
+ ARR_TO_VEC_2(src, tmp, i); \
+ BLOCK_WRITEN(type, 2, dst, offset + i * sg_size, tmp); \
+ i += 2; \
+ } \
+ if (size % 2 == 1) { \
+ type tmp = src[i]; \
+ BLOCK_WRITEN(type, 1, dst, offset + i * sg_size, tmp); \
+ } \
+}
+
+#define DECLARE_STORE_BLOCK_8(name, type) \
+inline void FUNC(name)(__global type* dst, uint offset, uint size, type* src) { \
+ uint i = 0; \
+ const uint sg_size = get_max_sub_group_size(); \
+ for (; i + 8 <= size; i += 8) { \
+ MAKE_VECTOR_TYPE(type, 8) tmp; \
+ ARR_TO_VEC_8(src, tmp, i); \
+ BLOCK_WRITEN(type, 8, dst, offset + i * sg_size, tmp); \
+ } \
+ if (size % 8 >= 4) { \
+ MAKE_VECTOR_TYPE(type, 4) tmp; \
+ ARR_TO_VEC_4(src, tmp, i); \
+ BLOCK_WRITEN(type, 4, dst, offset + i * sg_size, tmp); \
+ i += 4; \
+ } \
+ if (size % 4 >= 2) { \
+ MAKE_VECTOR_TYPE(type, 2) tmp; \
+ ARR_TO_VEC_2(src, tmp, i); \
+ BLOCK_WRITEN(type, 2, dst, offset + i * sg_size, tmp); \
+ i += 2; \
+ } \
+ if (size % 2 == 1) { \
+ type tmp = src[i]; \
+ BLOCK_WRITEN(type, 1, dst, offset + i * sg_size, tmp); \
+ } \
+}
+
+#define DECLARE_STORE_BLOCK_4(name, type) \
+inline void FUNC(name)(__global type* dst, uint offset, uint size, type* src) { \
+ uint i = 0; \
+ const uint sg_size = get_max_sub_group_size(); \
+ for (; i + 4 <= size; i += 4) { \
+ MAKE_VECTOR_TYPE(type, 4) tmp; \
+ ARR_TO_VEC_4(src, tmp, i); \
+ BLOCK_WRITEN(type, 4, dst, offset + i * sg_size, tmp); \
+ } \
+ if (size % 4 >= 2) { \
+ MAKE_VECTOR_TYPE(type, 2) tmp; \
+ ARR_TO_VEC_2(src, tmp, i); \
+ BLOCK_WRITEN(type, 2, dst, offset + i * sg_size, tmp); \
+ i += 2; \
+ } \
+ if (size % 2 == 1) { \
+ type tmp = src[i]; \
+ BLOCK_WRITEN(type, 1, dst, offset + i * sg_size, tmp); \
+ } \
+}
+
+#define DECLARE_READ_BLOCK_16(name, type) \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) { \
+ uint i = 0; \
+ const uint sg_size = get_max_sub_group_size(); \
+ for (; i + 16 <= size; i += 16) { \
+ MAKE_VECTOR_TYPE(type, 16) tmp = BLOCK_READN(type, 16, src, offset + i * sg_size); \
+ VEC_TO_ARR_16(tmp, dst, i); \
+ } \
+ if (size % 16 >= 8) { \
+ MAKE_VECTOR_TYPE(type, 8) tmp = BLOCK_READN(type, 8, src, offset + i * sg_size); \
+ VEC_TO_ARR_8(tmp, dst, i); \
+ i += 8; \
+ } \
+ if (size % 8 >= 4) { \
+ MAKE_VECTOR_TYPE(type, 4) tmp = BLOCK_READN(type, 4, src, offset + i * sg_size); \
+ VEC_TO_ARR_4(tmp, dst, i); \
+ i += 4; \
+ } \
+ if (size % 4 >= 2) { \
+ MAKE_VECTOR_TYPE(type, 2) tmp = BLOCK_READN(type, 2, src, offset + i * sg_size); \
+ VEC_TO_ARR_2(tmp, dst, i); \
+ i += 2; \
+ } \
+ if (size % 2 == 1) { \
+ type tmp = BLOCK_READN(type, 1, src, offset + i * sg_size); \
+ dst[i] = tmp; \
+ } \
+}
+
+#define DECLARE_READ_BLOCK_8(name, type) \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) { \
+ uint i = 0; \
+ const uint sg_size = get_max_sub_group_size(); \
+ for (; i + 8 <= size; i += 8) { \
+ MAKE_VECTOR_TYPE(type, 8) tmp = BLOCK_READN(type, 8, src, offset + i * sg_size); \
+ VEC_TO_ARR_8(tmp, dst, i); \
+ } \
+ if (size % 8 >= 4) { \
+ MAKE_VECTOR_TYPE(type, 4) tmp = BLOCK_READN(type, 4, src, offset + i * sg_size); \
+ VEC_TO_ARR_4(tmp, dst, i); \
+ i += 4; \
+ } \
+ if (size % 4 >= 2) { \
+ MAKE_VECTOR_TYPE(type, 2) tmp = BLOCK_READN(type, 2, src, offset + i * sg_size); \
+ VEC_TO_ARR_2(tmp, dst, i); \
+ i += 2; \
+ } \
+ if (size % 2 == 1) { \
+ type tmp = BLOCK_READN(type, 1, src, offset + i * sg_size); \
+ dst[i] = tmp; \
+ } \
+}
+
+#define DECLARE_READ_BLOCK_4(name, type) \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) { \
+ uint i = 0; \
+ const uint sg_size = get_max_sub_group_size(); \
+ for (; i + 4 <= size; i += 4) { \
+ MAKE_VECTOR_TYPE(type, 4) tmp = BLOCK_READN(type, 4, src, offset + i * sg_size); \
+ VEC_TO_ARR_4(tmp, dst, i); \
+ } \
+ if (size % 4 >= 2) { \
+ MAKE_VECTOR_TYPE(type, 2) tmp = BLOCK_READN(type, 2, src, offset + i * sg_size); \
+ VEC_TO_ARR_2(tmp, dst, i); \
+ i += 2; \
+ } \
+ if (size % 2 == 1) { \
+ type tmp = BLOCK_READN(type, 1, src, offset + i * sg_size); \
+ dst[i] = tmp; \
+ } \
+}
--- /dev/null
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/data_types.cl"
+
+#include "deconvolution_gpu_imad_common.cl"
+
+DECLARE_LOAD_CONTINOUS_4(load_input_ui, uint)
+DECLARE_LOAD_CONTINOUS_4(load_weights_ui, uint)
+
+#define FILTER_TYPE4 MAKE_VECTOR_TYPE(FILTER_TYPE, 4)
+#define INPUT_TYPE4 MAKE_VECTOR_TYPE(INPUT0_TYPE, 4)
+
+#define AS_FILTER_TYPE4 CAT(as_, FILTER_TYPE4)
+#define AS_INPUT_TYPE4 CAT(as_, INPUT_TYPE4)
+
+#define WEIGHTS_GET_INDEX(g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV32_ISV4_INDEX(FILTER, g, o, i, z, y, x)
+
+KERNEL(deconvolution_gpu_imad_ref)(
+ const __global INPUT0_TYPE* input,
+ __global OUTPUT_TYPE* restrict output,
+ const __global FILTER_TYPE* weights,
+#if BIAS_TERM
+ const __global BIAS_TYPE* bias,
+#endif
+#if HAS_FUSED_OPS_DECLS
+ FUSED_OPS_DECLS,
+#endif
+ uint split_idx
+) {
+ const __global uint* input_ui = (const __global uint*)input;
+ const __global uint* weights_ui = (const __global uint*)weights;
+
+ uint out_b = get_global_id(2);
+ uint out_f = get_global_id(0);
+ uint out_x = (uint)get_global_id(1) % OUTPUT_SIZE_X;
+#if OUTPUT_DIMS <= 4
+ uint out_y = (uint)get_global_id(1) / OUTPUT_SIZE_X;
+ uint out_z = 0;
+#elif OUTPUT_DIMS == 5
+ uint out_y = (uint)get_global_id(1) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
+ uint out_z = (uint)get_global_id(1) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+#endif
+
+#if GROUPED
+ uint group = out_f / FILTER_OFM_NUM;
+ uint ofm = out_f % FILTER_OFM_NUM;
+#else
+ uint group = 0;
+ uint ofm = out_f;
+#endif
+ uint if_start = group * FILTER_IFM_NUM;
+
+ int in_x_start = (int)out_x + (PADDING_SIZE_X - FILTER_SIZE_X + 1);
+ int in_y_start = (int)out_y + (PADDING_SIZE_Y - FILTER_SIZE_Y + 1);
+ int in_z_start = (int)out_z + (PADDING_SIZE_Z - FILTER_SIZE_Z + 1);
+
+ uint fx_start = 0;
+ uint fx_end = FILTER_SIZE_X;
+ uint fx_inc = STRIDE_SIZE_X;
+ if (in_x_start < 0)
+ fx_start = -in_x_start;
+ else if (in_x_start % STRIDE_SIZE_X != 0)
+ fx_start = STRIDE_SIZE_X - in_x_start % STRIDE_SIZE_X;
+ if (in_x_start + FILTER_SIZE_X - 1 >= INPUT0_SIZE_X * STRIDE_SIZE_X)
+ fx_end = INPUT0_SIZE_X * STRIDE_SIZE_X - in_x_start;
+
+ uint fy_start = 0;
+ uint fy_end = FILTER_SIZE_Y;
+ uint fy_inc = STRIDE_SIZE_Y;
+ if (in_y_start < 0)
+ fy_start = -in_y_start;
+ else if (in_y_start % STRIDE_SIZE_Y != 0)
+ fy_start = STRIDE_SIZE_Y - in_y_start % STRIDE_SIZE_Y;
+ if (in_y_start + FILTER_SIZE_Y - 1 >= INPUT0_SIZE_Y * STRIDE_SIZE_Y)
+ fy_end = INPUT0_SIZE_Y * STRIDE_SIZE_Y - in_y_start;
+
+ uint fz_start = 0;
+ uint fz_end = FILTER_SIZE_Z;
+ uint fz_inc = STRIDE_SIZE_Z;
+ if (in_z_start < 0)
+ fz_start = -in_z_start;
+ else if (in_z_start % STRIDE_SIZE_Z != 0)
+ fz_start = STRIDE_SIZE_Z - in_z_start % STRIDE_SIZE_Z;
+ if (in_z_start + FILTER_SIZE_Z - 1 >= INPUT0_SIZE_Z * STRIDE_SIZE_Z)
+ fz_end = INPUT0_SIZE_Z * STRIDE_SIZE_Z - in_z_start;
+
+ ACCUMULATOR_TYPE acc = 0;
+ uint in[TILE_IFM / 4];
+ uint wei[TILE_IFM / 4];
+
+ for (uint fz = fz_start; fz < fz_end; fz += fz_inc) {
+ int in_z = in_z_start + fz;
+ uint fixed_in_z = in_z / STRIDE_SIZE_Z;
+
+ for (uint fy = fy_start; fy < fy_end; fy += fy_inc) {
+ int in_y = in_y_start + fy;
+ uint fixed_in_y = in_y / STRIDE_SIZE_Y;
+
+ for (uint fx = fx_start; fx < fx_end; fx += fx_inc) {
+ int in_x = in_x_start + fx;
+ uint fixed_in_x = in_x / STRIDE_SIZE_X;
+
+ for (uint fi = 0; fi < FILTER_IFM_NUM; fi += TILE_IFM) {
+ // Load weights [1, TILE_IFM, 1, 1]
+ uint weights_idx = WEIGHTS_GET_INDEX(group, ofm, fi, FILTER_SIZE_Z - fz - 1, FILTER_SIZE_Y - fy - 1, FILTER_SIZE_X - fx - 1);
+ FUNC_CALL(load_weights_ui)(weights_ui, weights_idx / 4, TILE_IFM / 4, wei);
+
+ // Load input [1, TILE_IFM, 1, 1]
+#if FILTER_GROUPS_NUM == 1 || FILTER_IFM_NUM % TILE_IFM == 0
+# if OUTPUT_DIMS <= 4
+ uint input_idx = INPUT0_GET_INDEX(out_b, fi + if_start, fixed_in_y, fixed_in_x);
+# elif OUTPUT_DIMS == 5
+ uint input_idx = INPUT0_GET_INDEX(out_b, fi + if_start, fixed_in_z, fixed_in_y, fixed_in_x);
+# endif
+ FUNC_CALL(load_input_ui)(input_ui, input_idx / 4, TILE_IFM / 4, in);
+#else
+ for (uint tifm = 0; tifm < TILE_IFM; ++tifm) {
+# if OUTPUT_DIMS <= 4
+ uint input_idx = INPUT0_GET_INDEX(out_b, fi + if_start + tifm, fixed_in_y, fixed_in_x);
+# elif OUTPUT_DIMS == 5
+ uint input_idx = INPUT0_GET_INDEX(out_b, fi + if_start + tifm, fixed_in_z, fixed_in_y, fixed_in_x);
+# endif
+ ((INPUT0_TYPE*)(in))[tifm] = input[input_idx];
+ }
+#endif
+
+ __attribute__((opencl_unroll_hint))
+ for (uint imad_it = 0; imad_it < TILE_IFM / 4; ++imad_it) {
+ acc = IMAD(acc, AS_INPUT_TYPE4(in[imad_it]), AS_FILTER_TYPE4(wei[imad_it]));
+ }
+ }
+ }
+ }
+ }
+
+ ACTIVATION_TYPE dequantized;
+ dequantized = TO_ACTIVATION_TYPE(acc);
+
+#if BIAS_TERM
+ BIAS_TYPE bias_val = bias[out_f];
+ dequantized += TO_ACTIVATION_TYPE(bias_val);
+#endif
+
+ OUTPUT_TYPE result;
+#if HAS_FUSED_OPS
+ FUSED_OPS;
+ result = FUSED_OPS_RESULT;
+#else
+ result = TO_OUTPUT_TYPE(dequantized);
+#endif
+
+#if OUTPUT_DIMS <= 4
+ uint output_idx = OUTPUT_GET_INDEX(out_b, out_f, out_y, out_x);
+#elif OUTPUT_DIMS == 5
+ uint output_idx = OUTPUT_GET_INDEX(out_b, out_f, out_z, out_y, out_x);
+#endif
+ output[output_idx] = result;
+}
+
+#undef FILTER_TYPE4
+#undef INPUT_TYPE4
+#undef AS_FILTER_TYPE4
+#undef AS_INPUT_TYPE4
+
+#undef WEIGHTS_GET_INDEX
#if BIAS_TERM
const __global BIAS_TYPE* bias,
#endif
- uint split_idx
-#if FUSED_ELTWISE
- , const __global UNIT_TYPE* fuse_input
+#if HAS_FUSED_OPS_DECLS
+ FUSED_OPS_DECLS,
#endif
- )
+ uint split_idx
+ )
{
- UNIT_TYPE result = UNIT_VAL_ZERO;
+ ACCUMULATOR_TYPE acc = ACCUMULATOR_VAL_ZERO;
#if DIM_ORDER_XYBF == 1
const uint out_x = get_global_id(0);
uint fixed_input_offset_x = (uint)input_offset_x / STRIDE_SIZE_X;
uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
uint fixed_input_offset_z = (uint)input_offset_z / STRIDE_SIZE_Z;
-#if OUTPUT_LAYOUT_B_FS_ZYX_FSV16 || OUTPUT_LAYOUT_B_FS_YX_FSV16 || OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
+
uint input_idx;
-#else
- uint input_idx = input_offset + (uint)fixed_input_offset_x*INPUT0_X_PITCH + (uint)fixed_input_offset_y*INPUT0_Y_PITCH + (uint)fixed_input_offset_z*INPUT0_Z_PITCH;
+#if INPUT0_SIMPLE
+ input_idx = input_offset + (uint)fixed_input_offset_x*INPUT0_X_PITCH + (uint)fixed_input_offset_y*INPUT0_Y_PITCH + (uint)fixed_input_offset_z*INPUT0_Z_PITCH;
#endif
+
#if GRADIENT
uint filter_idx = filter_offset + of*FILTER_IFM_PITCH + (FILTER_SIZE_Z - k - 1)*FILTER_Z_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
- for (uint h = 0; h < FILTER_OFM_NUM; h++)
- {
-#if INPUT0_LAYOUT_B_FS_ZYX_FSV16 || INPUT0_LAYOUT_BS_FS_ZYX_BSV16_FSV16
- input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
-#elif INPUT0_LAYOUT_BS_FS_YX_FSV16
+ for (uint h = 0; h < FILTER_OFM_NUM; h++) {
+#if !INPUT0_SIMPLE
+# if INPUT0_DIMS <= 4
input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_y, fixed_input_offset_x);
+# elif INPUT0_DIMS == 5
+ input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
+# endif
#endif
- result = fma(input[input_idx], filter[filter_idx], result);
+
+ acc += TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(filter[filter_idx]);
filter_idx += FILTER_OFM_PITCH;
-#if !INPUT0_LAYOUT_B_FS_ZYX_FSV16 && !INPUT0_LAYOUT_BS_FS_ZYX_BSV16_FSV16 && !INPUT0_LAYOUT_B_FS_YX_FSV16
+#if INPUT0_SIMPLE
input_idx += INPUT0_FEATURE_PITCH;
#endif
}
-#else
+#else // GRADIENT
uint filter_idx = filter_offset + of*FILTER_OFM_PITCH + (FILTER_SIZE_Z - k - 1)*FILTER_Z_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
- for (uint h = 0; h < FILTER_IFM_NUM; h++)
- {
-#if OUTPUT_LAYOUT_B_FS_ZYX_FSV16 || OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
- input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
-#elif OUTPUT_LAYOUT_B_FS_YX_FSV16
+ for (uint h = 0; h < FILTER_IFM_NUM; h++) {
+#if !INPUT0_SIMPLE
+# if INPUT0_DIMS <= 4
input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_y, fixed_input_offset_x);
+# elif INPUT0_DIMS == 5
+ input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
+# endif
#endif
- result = fma(input[input_idx], filter[filter_idx], result);
+
+ acc += TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(filter[filter_idx]);
filter_idx += FILTER_IFM_PITCH;
-#if !OUTPUT_LAYOUT_B_FS_ZYX_FSV16 && !OUTPUT_LAYOUT_B_FS_YX_FSV16 && !OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
+#if INPUT0_SIMPLE
input_idx += INPUT0_FEATURE_PITCH;
#endif
}
-#endif
+#endif // GRADIENT
}
}
}
}
}
+ ACTIVATION_TYPE pre_activation = TO_ACTIVATION_TYPE(acc);
#if BIAS_TERM
- result += bias[ofm_offset];
-#endif
- const uint out_split_offset = g * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
-#if OUTPUT_LAYOUT_B_FS_ZYX_FSV16 || OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
- const uint dst_index = OUTPUT_OFFSET + OUTPUT_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_z, out_y, out_x);
-#elif OUTPUT_LAYOUT_B_FS_YX_FSV16
- const uint dst_index = OUTPUT_OFFSET + OUTPUT_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_y, out_x);
-#else
- const uint dst_index = OUTPUT_OFFSET + out_split_offset + batch_offset*OUTPUT_BATCH_PITCH + of*OUTPUT_FEATURE_PITCH + out_z*OUTPUT_Z_PITCH + out_y*OUTPUT_Y_PITCH + out_x*OUTPUT_X_PITCH;
+ pre_activation += TO_ACTIVATION_TYPE(bias[ofm_offset]);
#endif
-#if FUSED_ELTWISE
-#if OUTPUT_LAYOUT_B_FS_ZYX_FSV16 || OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
- const uint fused_index = INPUT1_OFFSET + INPUT1_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_z, out_y, out_x);
-#elif OUTPUT_LAYOUT_B_FS_YX_FSV16
- const uint fused_index = INPUT1_OFFSET + INPUT1_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_y, out_x);
-#else
- const uint fused_index = INPUT1_OFFSET + split_idx * INPUT1_FEATURE_PITCH * FILTER_OFM_NUM + batch_offset*INPUT1_BATCH_PITCH + of*INPUT1_FEATURE_PITCH + out_z*INPUT1_Z_PITCH + out_y*INPUT1_Y_PITCH + out_x*INPUT1_X_PITCH;
-#endif
-#if !GRADIENT
- output[dst_index] = ACTIVATION(result + fuse_input[fused_index], ACTIVATION_PARAMS);
+ ACTIVATION_TYPE post_activation = ACTIVATION(pre_activation, ACTIVATION_PARAMS);
+
+ OUTPUT_TYPE result;
+#if HAS_FUSED_OPS
+ FUSED_OPS;
+ result = FUSED_OPS_RESULT;
#else
- output[dst_index] = result + fuse_input[fused_index];
+ result = TO_OUTPUT_TYPE(post_activation);
#endif
+#if OUTPUT_DIMS <= 4
+ const uint dst_index = OUTPUT_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_y, out_x);
+#elif OUTPUT_DIMS == 5
+ const uint dst_index = OUTPUT_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_z, out_y, out_x);
#else
- output[dst_index] = ACTIVATION(result, ACTIVATION_PARAMS);
+# error deconvolution_gpu_ref.cl - Unsupported number of output dimensions.
#endif
+ output[dst_index] = result;
}
-
-#undef ACTIVATION
*******************************************************************************/
#include "ocl_types.h"
+#include "include/fetch.cl"
+#include "include/data_types.cl"
#if ID > 1
#define CASE_3D 1
#endif
KERNEL(gen9_common_conv_bwd_data_kernel)(
const __global DATA_T *diff_dst,
- __global DATA_T *diff_src,
+ __global DATA_T * restrict diff_src,
const __global DATA_T *wei,
#if WITH_BIAS
const __global DATA_T *bias,
#endif
- uint split_idx)
+#if HAS_FUSED_OPS_DECLS
+ FUSED_OPS_DECLS,
+#endif
+ uint split_idx
+ )
{
const int input_offset = (INPUT0_PAD_BEFORE_FEATURE_NUM / OC_BLOCK) * OD_FULL * OH_FULL * OW_FULL * OC_BLOCK * MB_BLOCK +
(INPUT0_PAD_BEFORE_SIZE_Z) * OH_FULL * OW_FULL * OC_BLOCK * MB_BLOCK +
blockC00 = ACTIVATION(blockC00, ACTIVATION_PARAMS);
blockC01 = ACTIVATION(blockC01, ACTIVATION_PARAMS);
+#if HAS_FUSED_OPS
+ {
+ FUSED_OPS_BLOCK_C00;
+ blockC00 = FUSED_OPS_RESULT_BLOCK_C00;
+ }
+ {
+ FUSED_OPS_BLOCK_C01;
+ blockC01 = FUSED_OPS_RESULT_BLOCK_C01;
+ }
+#endif
+
SAVE_SRC_DIFF(blockC00, src_write0, 0);
SAVE_SRC_DIFF(blockC01, src_write0, 8);
for (int i = 0; i < IW_BLOCK; i++) {
blockC00[i] = ACTIVATION(blockC00[i], ACTIVATION_PARAMS);
if (iw + i >= IW) continue;
+#if HAS_FUSED_OPS
+ FUSED_OPS_BLOCK_CI;
+ blockC00[i] = FUSED_OPS_RESULT_BLOCK_CI;
+#endif
BLOCK_WRITE((__global BLOCK_DATA_T *)(&(src_write0)[i * IC_BLOCK]),
AS_BLOCK_DATA_T(blockC00[i]));
}
uint f_pad_before, uint f_pad_after,
uint y_pad_before, uint y_pad_after,
uint x_pad_before, uint x_pad_after, uint alignment) {
- const uint fs = f / alignment;
- const uint fsv = f % alignment;
+ const uint feature = f + f_pad_before;
+ const uint fs = feature / alignment;
+ const uint fsv = feature % alignment;
const uint x_pitch = alignment;
const uint y_pitch = x_pitch * (x_pad_before + x_size + x_pad_after);
const uint total_f_size = f_pad_before + f_size + f_pad_after;
const uint fs_pitch = y_pitch * (y_pad_before + y_size + y_pad_after);
const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
- const uint fs_pad_before = f_pad_before / alignment;
-
const uint output_offset = b * b_pitch +
- (fs + fs_pad_before) * fs_pitch +
+ fs * fs_pitch +
(y_pad_before + y) * y_pitch +
(x_pad_before + x) * x_pitch
+ fsv;
uint f_pad_before, uint f_pad_after,
uint y_pad_before, uint y_pad_after,
uint x_pad_before, uint x_pad_after, uint alignment) {
- const uint f_mod = f % f_size;
+ const uint f_mod = f_pad_before + (f % f_size);
const uint fs = f_mod / alignment;
const uint fsv = f_mod % alignment;
const uint x_pitch = alignment;
const uint fs_pitch = y_pitch * (y_pad_before + y_size + y_pad_after);
const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
- const uint fs_pad_before = f_pad_before / alignment;
-
const uint output_offset = b * b_pitch +
- (fs_pad_before + fs) * fs_pitch +
+ fs * fs_pitch +
(y_pad_before + (y % y_size)) * y_pitch +
(x_pad_before + (x % x_size)) * x_pitch
+ fsv;
uint x_pad_before, uint x_pad_after,
uint alignment)
{
- const uint fs = f / alignment;
- const uint fsv = f % alignment;
+ const uint feature = f + f_pad_before;
+ const uint fs = feature / alignment;
+ const uint fsv = feature % alignment;
const uint x_pitch = alignment;
const uint y_pitch = x_pitch * (x_pad_before + x_size + x_pad_after);
const uint z_pitch = y_pitch * (y_pad_before + y_size + y_pad_after);
const uint total_f_size = f_pad_before + f_size + f_pad_after;
const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
- const uint fs_pad_before = f_pad_before / alignment;
-
const uint output_offset = b * b_pitch +
- (fs_pad_before + fs) * fs_pitch +
+ fs * fs_pitch +
(z_pad_before + z) * z_pitch +
(y_pad_before + y) * y_pitch +
(x_pad_before + x) * x_pitch
uint y_pad_before, uint y_pad_after,
uint x_pad_before, uint x_pad_after,
uint alignment) {
- const uint f_mod = f % f_size;
+ const uint f_mod = f_pad_before + (f % f_size);
const uint fs = f_mod / alignment;
const uint fsv = f_mod % alignment;
const uint x_pitch = alignment;
const uint total_f_size = f_pad_before + f_size + f_pad_after;
const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
- const uint fs_pad_before = f_pad_before / alignment;
-
const uint output_offset = b * b_pitch +
- (fs_pad_before + fs) * fs_pitch +
+ fs * fs_pitch +
(z_pad_before + (z % z_size)) * z_pitch +
(y_pad_before + (y % y_size)) * y_pitch +
(x_pad_before + (x % x_size)) * x_pitch
uint y_pad_before, uint y_pad_after,
uint x_pad_before, uint x_pad_after, uint alignmentF, uint alignmentB) {
const uint b_mod = b % b_size;
- const uint f_mod = f % f_size;
+ const uint f_mod = f_pad_before + (f % f_size);
const uint fs = f_mod / alignmentF;
const uint fsv = f_mod % alignmentF;
const uint bs = b_mod / alignmentB;
const uint fs_pitch = z_pitch * (z_pad_before + z_size + z_pad_after);
const uint b_pitch = fs_pitch * ((total_f_size + alignmentF - 1) / alignmentF);
- const uint fs_pad_before = f_pad_before / alignmentF;
-
const uint output_offset = (bs * b_pitch) + (bsv * alignmentF) +
- (fs_pad_before + fs) * fs_pitch +
+ fs * fs_pitch +
(z_pad_before + (z % z_size)) * z_pitch +
(y_pad_before + (y % y_size)) * y_pitch +
(x_pad_before + (x % x_size)) * x_pitch
uint y_pad_before, uint y_pad_after,
uint x_pad_before, uint x_pad_after) {
const uint alignment = 16;
- const uint fs = f / alignment;
- const uint fsv = f % alignment;
+ const uint feature = f + f_pad_before;
+ const uint fs = feature / alignment;
+ const uint fsv = feature % alignment;
const uint bs = b / alignment;
const uint bsv = b % alignment;
const uint total_f_size = f_pad_before + f_size + f_pad_after;
const uint bs_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
- const uint fs_pad_before = f_pad_before / alignment;
-
const uint output_offset = bs * bs_pitch +
- (fs_pad_before + fs) * fs_pitch +
+ fs * fs_pitch +
(z_pad_before + z) * z_pitch +
(y_pad_before + y) * y_pitch +
(x_pad_before + x) * x_pitch +
((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH) \
)
+inline uint FUNC(get_g_os_zyx_is_osv_isv_index)(uint g, uint o, uint i, uint z, uint y, uint x,
+ uint g_size, uint o_size, uint i_size, uint z_size, uint y_size, uint x_size,
+ uint osv, uint isv) {
+ uint is_size = (i_size + isv - 1) / isv;
+ uint os_size = (o_size + osv - 1) / osv;
+
+ uint isv_index = i % isv;
+ uint osv_index = o % osv;
+ uint is_index = i / isv;
+ uint os_index = o / osv;
+
+ uint isv_pitch = 1;
+ uint osv_pitch = isv_pitch * isv;
+ uint is_pitch = osv_pitch * osv;
+ uint x_pitch = is_pitch * is_size;
+ uint y_pitch = x_pitch * x_size;
+ uint z_pitch = y_pitch * y_size;
+ uint os_pitch = z_pitch * z_size;
+ uint g_pitch = os_pitch * os_size;
+
+ uint index = 0;
+ index += isv_index * isv_pitch;
+ index += osv_index * osv_pitch;
+ index += is_index * is_pitch;
+ index += x * x_pitch;
+ index += y * y_pitch;
+ index += z * z_pitch;
+ index += os_index * os_pitch;
+ index += g * g_pitch;
+ return index;
+}
+
+#define GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, osv, isv) \
+ FUNC_CALL(get_g_os_zyx_is_osv_isv_index)( \
+ g, o, i, z, y, x, \
+ CAT(tensor, _GROUPS_NUM), \
+ CAT(tensor, _OFM_NUM), \
+ CAT(tensor, _IFM_NUM), \
+ CAT(tensor, _SIZE_Z), \
+ CAT(tensor, _SIZE_Y), \
+ CAT(tensor, _SIZE_X), \
+ osv, isv)
+
+#define GET_FILTER_G_OS_ZYX_IS_OSV16_ISV4_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 16, 4)
+#define GET_FILTER_G_OS_ZYX_IS_OSV16_ISV16_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 16, 16)
+#define GET_FILTER_G_OS_ZYX_IS_OSV16_ISV32_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 16, 32)
+#define GET_FILTER_G_OS_ZYX_IS_OSV32_ISV4_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 32, 4)
+#define GET_FILTER_G_OS_ZYX_IS_OSV32_ISV16_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 32, 16)
+#define GET_FILTER_G_OS_ZYX_IS_OSV32_ISV32_INDEX(tensor, g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 32, 32)
+
#define DECLARE_SAMPLER const sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST
#if FP16_UNIT_USED
return (uint8)(0, dst_b, dst_f, 0, 0, dst_y, dst_x, 0);
}
-inline uint8 FUNC(reshape_grouped_to_simple)(uint g, uint o, uint i, uint z, uint y, uint x, uint src_size_groups, uint dst_size_ofm)
+inline uint8 FUNC(reshape_grouped)(uint g, uint o, uint i, uint z, uint y, uint x, uint src_size_ofm, uint dst_size_ofm)
{
- const uint ofm_per_group = dst_size_ofm / src_size_groups;
- const uint dst_ofm = g * ofm_per_group + (o % ofm_per_group);
+ const uint flat_ofm = g * src_size_ofm + o;
+ const uint dst_ofm = flat_ofm % dst_size_ofm;
+ const uint dst_g = flat_ofm / dst_size_ofm;
const uint dst_ifm = i;
const uint dst_z = z;
const uint dst_y = y;
const uint dst_x = x;
- return (uint8)(0, dst_ofm, dst_ifm, 0, dst_z, dst_y, dst_x, 0);
+ return (uint8)(dst_g, dst_ofm, dst_ifm, 0, dst_z, dst_y, dst_x, 0);
}
inline uint8 FUNC(reshape_dims)(
{
if (src_dims == 5 && dst_dims == 4) // goiyx -> oiyx
{
- return FUNC_CALL(reshape_grouped_to_simple)(g, o, i, 0, y, x, src_size_groups, dst_size_ofm);
+ return FUNC_CALL(reshape_grouped)(g, o, i, 0, y, x, src_size_ofm, dst_size_ofm);
}
- else if (src_dims == 6 && dst_dims == 5) // goizyx -> oizyx
+ else if (src_dims == 6 && dst_dims == 5) // goizyx -> oizyx or goizyx -> goiyx
{
- return FUNC_CALL(reshape_grouped_to_simple)(g, o, i, z, y, x, src_size_groups, dst_size_ofm);
+ return FUNC_CALL(reshape_grouped)(g, o, i, z, y, x, src_size_ofm, dst_size_ofm);
+ }
+ else if (src_dims == 6 && dst_dims == 4) // goizyx -> oiyx
+ {
+ return FUNC_CALL(reshape_grouped)(g, o, i, 0, y, x, src_size_ofm, dst_size_ofm);
}
return (uint8)(g, o, i, w, z, y, x, 0);
return GET_FILTER_GS_OI_YXS_GSV16_YXSV4_INDEX(INPUT0, g, o, i, y, x);
#elif defined INPUT0_LAYOUT_GS_OI_YXS_GSV32_YXSV4
return GET_FILTER_GS_OI_YXS_GSV32_YXSV4_INDEX(INPUT0, g, o, i, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV16_ISV4
+ return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV4_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV16_ISV16
+ return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV16_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV16_ISV32
+ return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV32_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV32_ISV4
+ return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV4_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV32_ISV16
+ return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV16_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV32_ISV32
+ return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV32_INDEX(INPUT0, g, o, i, z, y, x);
#else
#error reorder_weights.cl: input format - not supported
#endif
return GET_FILTER_GS_OI_YXS_GSV32_YXSV4_INDEX(OUTPUT, g, o, i, y, x);
#elif defined OUTPUT_LAYOUT_G_OS_IS_YX_OSV16_ISV4
return GET_FILTER_G_OS_IS_YX_OSV16_ISV4_INDEX(OUTPUT, g, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV16_ISV4
+ return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV4_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV16_ISV16
+ return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV16_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV16_ISV32
+ return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV32_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV32_ISV4
+ return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV4_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV32_ISV16
+ return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV16_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV32_ISV32
+ return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV32_INDEX(OUTPUT, g, o, i, z, y, x);
#else
#error reorder_weights.cl: output format - not supported
#endif
#if OUTPUT_GROUPS_NUM > 1
const unsigned g = (uint)get_global_id(0) / OUTPUT_OFM_NUM;
const unsigned o = (uint)get_global_id(0) % OUTPUT_OFM_NUM;
- const unsigned i = (uint)get_global_id(1);
-#if OUTPUT_DIMS == 5
- const unsigned z = 0;
- const unsigned y = (uint)get_global_id(2) / OUTPUT_SIZE_X;
- const unsigned x = (uint)get_global_id(2) % OUTPUT_SIZE_X;
-#elif OUTPUT_DIMS == 6
- const unsigned zyx = get_global_id(2);
- const unsigned x = zyx % INPUT0_SIZE_X;
- const unsigned y = (zyx / INPUT0_SIZE_X) % INPUT0_SIZE_Y;
- const unsigned z = (zyx / INPUT0_SIZE_X) / INPUT0_SIZE_Y;
-#endif
#else
+ const unsigned g = 0;
const unsigned o = (uint)get_global_id(0);
+#endif
+
const unsigned i = (uint)get_global_id(1);
- const unsigned g = 0;
-#if OUTPUT_DIMS == 2
- const unsigned z = 0;
- const unsigned y = 0;
+
+#if OUTPUT_DIMS == 2 || (OUTPUT_DIMS == 3 && OUTPUT_GROUPED)
const unsigned x = 0;
-#elif OUTPUT_DIMS == 4
+ const unsigned y = 0;
+ const unsigned z = 0;
+#elif OUTPUT_DIMS == 4 || (OUTPUT_DIMS == 5 && OUTPUT_GROUPED)
+ const unsigned x = (uint)get_global_id(2) % OUTPUT_SIZE_X;
+ const unsigned y = (uint)get_global_id(2) / OUTPUT_SIZE_X;
const unsigned z = 0;
- const unsigned y = (uint)get_global_id(2) / INPUT0_SIZE_X;
- const unsigned x = (uint)get_global_id(2) % INPUT0_SIZE_X;
-#elif OUTPUT_DIMS == 5
+#elif OUTPUT_DIMS == 5 || (OUTPUT_DIMS == 6 && OUTPUT_GROUPED)
const unsigned zyx = get_global_id(2);
- const unsigned x = zyx % INPUT0_SIZE_X;
- const unsigned y = (zyx / INPUT0_SIZE_X) % INPUT0_SIZE_Y;
- const unsigned z = (zyx / INPUT0_SIZE_X) / INPUT0_SIZE_Y;
-#endif
+ const unsigned x = zyx % OUTPUT_SIZE_X;
+ const unsigned y = (zyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+ const unsigned z = (zyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y;
#endif
#if OUTPUT_GROUPS_NUM > 1 // Add grouped macro instead this check
#else
uint8 ir = RESHAPE_WEIGHT_DIMS(OUTPUT, INPUT0, o, i, 0, z, y, x);
#endif
- output[FUNC_CALL(get_output_index)(g, o, i, z, y, x)] = TO_OUTPUT_TYPE(input[FUNC_CALL(get_input_index)(ir[0],ir[1],ir[2],ir[4],ir[5],ir[6])]);
+
+ uint input_idx = FUNC_CALL(get_input_index)(ir[0],ir[1],ir[2],ir[4],ir[5],ir[6]);
+#if !REORDER_ROTATE
+ uint output_idx = FUNC_CALL(get_output_index)(g, o, i, z, y, x);
+#else
+ uint output_idx = FUNC_CALL(get_output_index)(g, o, i, OUTPUT_SIZE_Z - z - 1, OUTPUT_SIZE_Y - y - 1, OUTPUT_SIZE_X - x - 1);
+#endif
+
+ output[output_idx] = TO_OUTPUT_TYPE(input[input_idx]);
}
#endif
definitions.push_back({ safe_index_func_name, safe_index_func_val });
definitions.push_back({ index_func_name, index_func_val });
} else {
- definitions.push_back({ safe_index_func_name, "f" });
- definitions.push_back({ index_func_name, "f" });
+ definitions.push_back({ safe_index_func_name, "(f)" });
+ definitions.push_back({ index_func_name, "(f)" });
}
} else {
definitions.push_back({ safe_index_func_name, safe_index_func_val });
auto in_lo = p->per_tensor_input_range ? Broadcast(std::to_string(p->in_lo), desc.tensors[0].GetDType(), vec_size) : GetInputVarName(0);
auto in_hi = p->per_tensor_input_range ? Broadcast(std::to_string(p->in_hi), desc.tensors[0].GetDType(), vec_size) : GetInputVarName(1);
- op_decls += "\\\n\t" + tmp_type + " " + tmp_var + " = min(max(" + in_lo + ", " + in_converted + "), " + in_hi + ");";
+ if (p->has_clamp) {
+ op_decls += "\\\n\t" + tmp_type + " " + tmp_var + " = min(max(" + in_lo + ", " + in_converted + "), " + in_hi + ");";
+ } else {
+ op_decls += "\\\n\t" + tmp_type + " " + tmp_var + " = " + in_converted + ";";
+ }
op_decls += "\\\n\t" + tmp_var + " = " + tmp_var + "*" + pre_scale + ";";
if (p->has_pre_shift)
op_decls += "\\\n\t" + tmp_var + " = " + tmp_var + " + " + pre_shift + ";";
+
op_decls += "\\\n\t" + tmp_var + " = round(" + tmp_var + ");";
bool need_round = (p->has_post_scale || p->has_post_shift) &&
}
if (should_be_safe) {
- return GetInputTensorName(input_id) + "_GET_INDEX_SAFE(" + idx_order +")";
+ return GetInputTensorName(input_id) + "_GET_INDEX_SAFE(" + idx_order + ")";
} else {
- return GetInputTensorName(input_id) + "_GET_INDEX(" + idx_order +")";
+ return GetInputTensorName(input_id) + "_GET_INDEX(" + idx_order + ")";
}
}
}
std::string FusedOpsCodeGenerator::GetOutputVarName(std::string input_var) const {
- static int i = 0;
std::replace(input_var.begin(), input_var.end(), '[', '_');
std::replace(input_var.begin(), input_var.end(), ']', '_');
std::replace(input_var.begin(), input_var.end(), ' ', '_');
- return input_var + "_" + std::to_string(i++);
+ return input_var + "_out";
}
std::string FusedOpsCodeGenerator::GetType(Datatype dt, size_t vec_size) const {
auto p = std::dynamic_pointer_cast<quantize_fuse_params>(desc.op_params);
if (p) {
std::vector<size_t> res = {};
- if (!p->per_tensor_input_range) {
+ if (!p->per_tensor_input_range && p->has_clamp) {
res.push_back(0);
res.push_back(1);
}
std::string y;
std::string x;
size_t dims;
- explicit idx_desc(std::vector<std::string> idx, DataTensor t) : b("0"), f("0"), z("0"), y("0"), x("0"), dims(0) {
+ explicit idx_desc(std::vector<std::string> idx, DataTensor t)
+ : b("0"), f("0"), z("0"), y("0"), x("0"), dims(0) {
dims = idx.size();
switch (dims) {
case 1: f = idx[0]; break;
static WeightsFormatSupportType CheckWeights(const weight_bias_params& newParams,
WeightsType reqType,
WeightsLayout reqLayouts,
- const ParamsKey& paramsKey) {
+ const ParamsKey& paramsKey,
+ bool rotate) {
// validate if weights type is image and if device supports requested sizes
if (Tensor::IsImageType(reqLayouts)) {
if (!CheckImageSize(newParams, reqLayouts))
}
reorderNeeded |= tensor.GetLayout() != reqLayouts;
+ reorderNeeded |= rotate;
- if (reorderNeeded && !pitchesDifferFromLS) {
+ if (reorderNeeded && !pitchesDifferFromLS && !rotate) {
reorderNeeded = !((reqLayouts == WeightsLayout::io && tensor.GetLayout() == WeightsLayout::iyxo) ||
(reqLayouts == WeightsLayout::oi && tensor.GetLayout() == WeightsLayout::oiyx));
}
WeightsLayout reqLayout,
WeightsReorderParams& weightsReorderParams,
const ParamsKey& paramsKey,
- size_t groups) {
+ size_t groups,
+ bool rotate) {
const auto& optParams = static_cast<const weight_bias_optional_params&>(options);
const auto inType = DataTypeToWeightsType(newParams.inputs[0].GetDType());
const auto dtype = paramsKey.isEnabledDifferentInputWeightsTypes() ? newParams.weights.GetDType() : inType;
- switch (CheckWeights(newParams, inType, reqLayout, paramsKey)) {
+ switch (CheckWeights(newParams, inType, reqLayout, paramsKey, rotate)) {
case SUPPORTED:
return true;
case UNSUPPORTED:
r_params.layerID = newParams.layerID + "_reorder_";
r_params.input = newParams.weights;
r_params.output = newParams.weights.TransformIgnorePadding(reqLayout, dtype, groups, false);
+ r_params.rotate_180 = rotate;
r_params.engineInfo = newParams.engineInfo;
reorder_optional_params op;
if (kernels_data.empty()) {
throw std::runtime_error("No suitable kernel found for weights reorder from " +
toString(r_params.input.GetLayout()) + " to " +
- toString(r_params.output.GetLayout()));
+ toString(r_params.output.GetLayout()) +
+ (rotate ? " with rotate" : ""));
}
weightsReorderParams.engine = WeightsReorderParams::Engine::GPU;
if (params.inputs.size()) {
no_pitch_same_dims = !params.inputs[0].PitchesDifferFromLogicalDims();
- if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16 && params.inputs[0].Feature().v % 16 != 0)
+ if ((params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16 && params.inputs[0].Feature().v % 16 != 0) ||
+ (params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16 && params.inputs[0].Feature().v % 16 != 0))
return false;
for (size_t i = 1; i < params.inputs.size(); i++) {
no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == params.inputs[i]);
- if (params.inputs[i].GetLayout() == DataLayout::b_fs_yx_fsv16 && params.inputs[i].Feature().v % 16 != 0)
+ if ((params.inputs[i].GetLayout() == DataLayout::b_fs_yx_fsv16 && params.inputs[i].Feature().v % 16 != 0) ||
+ (params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16 && params.inputs[0].Feature().v % 16 != 0))
return false;
}
WeightsLayout layout,
WeightsReorderParams& weightsReorderParams,
const ParamsKey& paramsKey = ParamsKey(),
- size_t groups = 1);
+ size_t groups = 1,
+ bool rotate = false);
JitConstants GetTensorFriendlyWorkGroupsJit(const DataTensor& t);
std::vector<size_t> GetTensorFriendlyWorkGroups(const DataTensor& t);
std::vector<size_t> GetOptimalLocalWorkGroupSizes(std::vector<size_t> gws, const EngineInfo& info);
jit.AddConstant(MakeJitConstant("INPUT" + toCodeString(i), params.inputs[i]));
}
+#if !NDEBUG
jit.AddConstant(MakeJitConstant("LayerID", params.layerID));
-
+#endif
return jit;
}
virtual const std::string GetName() const { return kernelName; }
static const primitive_db& get_db() { return db; }
+ static void ResetCounter() { counter = 0; }
protected:
static const primitive_db db;
case WeightsLayout::gs_oi_yxs_gsv32_yxsv4: return "GS_OI_YXS_GSV32_YXSV4";
case WeightsLayout::g_os_is_yx_isv16_osv16: return "G_OS_IS_YX_ISV16_OSV16";
case WeightsLayout::g_os_is_yx_osv16_isv4: return "G_OS_IS_YX_OSV16_ISV4";
+ case WeightsLayout::g_os_zyx_is_osv16_isv4: return "G_OS_ZYX_IS_OSV16_ISV4";
+ case WeightsLayout::g_os_zyx_is_osv16_isv16: return "G_OS_ZYX_IS_OSV16_ISV16";
+ case WeightsLayout::g_os_zyx_is_osv16_isv32: return "G_OS_ZYX_IS_OSV16_ISV32";
+ case WeightsLayout::g_os_zyx_is_osv32_isv4: return "G_OS_ZYX_IS_OSV32_ISV4";
+ case WeightsLayout::g_os_zyx_is_osv32_isv16: return "G_OS_ZYX_IS_OSV32_ISV16";
+ case WeightsLayout::g_os_zyx_is_osv32_isv32: return "G_OS_ZYX_IS_OSV32_ISV32";
default: throw std::invalid_argument("Failed to convert WeightsLayout " + std::to_string(layout) + " to string");
}
}
uint32_t split : 1;
uint32_t dilation : 1;
uint32_t depthwise_separable_opt : 1;
- uint32_t transposed : 1;
uint32_t local : 1;
uint32_t grouped : 1;
uint32_t deformable : 1;
} resample;
struct reorder_t {
uint32_t winograd : 1;
+ uint32_t rotate : 1;
} reorder;
struct eltwise_t {
uint32_t stride : 1;
void EnableDepthwiseSeparableOpt() { key.restrict.val.dedicated.conv.depthwise_separable_opt = 1; }
void EnableLocalConvolution() { key.restrict.val.dedicated.conv.local = 1; }
void EnableGroupedConvolution() { key.restrict.val.dedicated.conv.grouped = 1; }
- void EnableTranspose() { key.restrict.val.dedicated.conv.transposed = 1; }
void EnableInt8Quantization() { key.restrict.val.quantization = 1; }
void EnableOutputCalibration() { key.restrict.val.output_calibration = 1; }
void EnableDeformableMode() { key.restrict.val.dedicated.conv.deformable = 1; }
void EnableQuantizeScaleShiftOpt() { key.restrict.val.dedicated.quantize.scale_shift_opt = 1; }
void EnableWinogradReorder() { key.restrict.val.dedicated.reorder.winograd = 1; }
+ void EnableRotateReorder() { key.restrict.val.dedicated.reorder.rotate = 1; }
void EnableSoftmaxDim(SoftmaxDim d);
void EnableConcatAxis(ConcatAxis a);
void EnableReampleType(ResampleType a);
CLDNN_ERROR_MESSAGE(node.id(), "Requested activation is not supported for integer type.");
}
+ if (node.has_fused_primitives()) {
+ input_node_layout.data_type = node.get_fused_output_layout().data_type;
+ }
+
return input_node_layout;
}
auto input_layout = node.input().get_output_layout();
auto weights_layout = node.weights(0).get_output_layout(); // weights are stored after inputs
+ auto data_type = input_layout.data_type;
+ if ((input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8) && !node.has_fused_primitives()) {
+ data_type = data_types::f32;
+ }
+
+ if (node.has_fused_primitives()) {
+ data_type = node.get_fused_output_layout().data_type;
+ }
+
auto input_offset = desc->input_offset;
auto strd = desc->stride;
auto group = desc->groups;
desc->output_size.spatial[0],
desc->output_size.spatial[1],
desc->output_size.spatial[2]);
- return {input_layout.data_type, input_layout.format, output_size};
+ return {data_type, input_layout.format, output_size};
}
// compute output_dim <= stride * (input_size - 1) + kernel_size + 2 * input_offset;
tensor output_size(input_layout.size.batch[0],
number_of_features, x, y, z);
- return {input_layout.data_type, input_layout.format, output_size};
+ return {data_type, input_layout.format, output_size};
}
std::string deconvolution_inst::to_string(deconvolution_node const& node) {
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
+ implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
+ implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
+
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), val_fw);
protected:
// TODO: share it with convolution and fully connected
- bool validate_impl(const typed_primitive_inst<deconvolution>& instance) const override {
+ bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
bool res = true;
CLDNN_ERROR_NOT_EQUAL(_outer.id(),
"padding mode",
0.0f,
"Unknown padding mode in deconvolution.");
- // Check whether all memory elements use the same unit type (FP16 or FP32).
- auto input_count = instance.inputs_memory_count();
- auto input_data_type = 0 == input_count ?
- instance.node.input().get_output_layout().data_type :
- instance.input_memory().get_layout().data_type;
- CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(),
- "Input memory",
- input_data_type,
- "output memory",
- instance.output_memory().get_layout().data_type,
- "");
- CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(),
- "Input memory",
- input_data_type,
- "filter memory",
- instance.weights_memory(0).get_layout().data_type,
- "");
return res;
}
kernel::kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance,
int32_t split) const override {
kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
- auto* desc = static_cast<const deconvolution*>(instance.desc().get());
- int dep_size = static_cast<int>((desc->weights.size() + desc->bias.size() + 1));
args.weights = (memory_impl::cptr) &instance.weights_memory(split);
args.bias = (memory_impl::cptr) (instance.bias_term() ? &instance.bias_memory(split) : nullptr);
- if (static_cast<int>(instance.dependencies().size()) > dep_size)
- args.inputs.emplace_back(&instance.dep_memory(dep_size));
-
return args;
}
const auto& primitive = arg.get_primitive();
const auto& weights_layout = arg.weights(0).get_output_layout();
- switch (weights_layout.fused_format()) {
- // FP32 (float)
- case fuse(data_types::f32, format::goiyx):
- case fuse(data_types::f32, format::yxio):
- case fuse(data_types::f32, format::gyxio):
- case fuse(data_types::f32, format::goizyx):
- case fuse(data_types::f16, format::goiyx):
- case fuse(data_types::f16, format::yxio):
- case fuse(data_types::f16, format::gyxio):
- case fuse(data_types::f16, format::goizyx):
- case fuse(data_types::f32, format::oiyx):
- case fuse(data_types::f32, format::yxfb):
- case fuse(data_types::f32, format::oizyx):
- case fuse(data_types::f16, format::oiyx):
- case fuse(data_types::f16, format::yxfb):
- case fuse(data_types::f16, format::oizyx):
- break;
- default:
- throw std::runtime_error("deconvolution weights format unsupported");
- }
-
const auto& weights_size = weights_layout.size;
const auto& split = primitive->split();
deconv_params.gradient = primitive->gradient();
- if (arg.get_dependencies().size() > primitive->weights.size() + primitive->bias.size() + 1) {
- deconv_params.fused_eltwise = true;
- deconv_params.inputs.push_back(convert_data_tensor(arg.fused_sum().get_output_layout()));
- }
-
auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+ deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
deconvolution_gpu::create);
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+ deconvolution_gpu::create);
+ implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+ deconvolution_gpu::create);
}
} // namespace detail
supports_subgroups_short = extensions.find("cl_intel_subgroups_short") != std::string::npos;
- supports_imad = true;
+ supports_imad = dev_name.find("Gen12") != std::string::npos;
supports_immad = false;
dev_type = static_cast<uint32_t>(device.getInfo<CL_DEVICE_TYPE>());
_outer(arg),
_device_info(arg.get_program().get_engine().get_context()->get_device_info()),
_kernel_data(kd) {
+ // weights reorder params got copied to parent, clear in _kernel_data to release shared ptr
+ _kernel_data.weightsReorderParams.engine = kernel_selector::generic_kernel_params::Engine::NONE;
+ _kernel_data.weightsReorderParams.cpuKernel = nullptr;
+ _kernel_data.weightsReorderParams.clKernel = nullptr;
+
_kernels.reserve(kd.kernels.size());
for (size_t i = 0; i < kd.kernels.size(); ++i) {
gpu::kernel kernel(_outer.get_program().get_engine().get_context(),
template<typename T>
void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
auto offsets = get_weights_bias_offset(node);
+ auto* impl = node.get_selected_impl().get();
+ auto output_layout = node.get_output_layout();
+ auto& weights_reorder_params = impl->_weights_reorder_params;
+
for (auto i = offsets.weights_offset; i < offsets.bias_offset; i++) {
auto& weights_node = node.get_dependency(i);
- auto* impl = node.get_selected_impl().get();
- auto output_layout = node.get_output_layout();
auto weights_layout = weights_node.get_output_layout();
- auto reorders = _rf.get_weights_reorder(weights_node.id(), weights_layout, impl->_weights_reorder_params);
+ auto reorders = _rf.get_weights_reorder(weights_node.id(), weights_layout, weights_reorder_params);
for (auto& reorder : reorders) {
// insert new generic_layer node to topology
g_node.get_output_layout(false);
g_node.selected_impl = g_node.type()->choose_impl(p.get_engine(), g_node);
}
- // set the old output layout and do not invalidate users as change of weights will not affect output layout
- node.set_output_layout(output_layout, false);
}
+
+ // Reset weights reorder params to not keep source code pointer
+ weights_reorder_params.engine = kernel_selector::generic_kernel_params::Engine::NONE;
+ weights_reorder_params.clKernel = nullptr;
+ weights_reorder_params.cpuKernel = nullptr;
+
+ // set the old output layout and do not invalidate users as change of weights will not affect output layout
+ node.set_output_layout(output_layout, false);
}
void post_optimize_weights::run(program_impl& p) {
if (!p.get_options().get<build_option_type::optimize_data>()->enabled())
continue;
+ auto& deconv_node = node->as<deconvolution>();
+ auto& weights_node = deconv_node.weights();
auto deconv_prim = node->as<deconvolution>().typed_desc();
- tensor filter_size = { 1, 1, 1, 1, 1 };
+ tensor filter_size = weights_node.get_output_layout().size;
auto weights = deconv_prim->weights;
std::vector<primitive_id> weights_vec;
for (auto& weights_id : weights_vec) {
auto weights_iter = p.nodes_map.find(weights_id);
if (weights_iter == p.nodes_map.end()) continue;
-
- auto weights_node_ptr = weights_iter->second;
- // get filter spatial sizes for input offset adjustment, perform this only once as all filters should
- // have same size
- if (weights_id == weights_vec[0])
- filter_size = weights_node_ptr->get_output_layout().size;
}
// limit optimization to stride = 1
- if (deconv_prim->stride.spatial[0] == 1 && deconv_prim->stride.spatial[1] == 1 && !deconv_prim->gradient()) {
+ bool unit_stride = std::all_of(deconv_prim->stride.spatial.begin(),
+ deconv_prim->stride.spatial.end(),
+ [](tensor::value_type v) { return v == 1; });
+ if (unit_stride && !deconv_prim->gradient()) {
primitive_id deconv_id = node->id();
auto& input_node = node->get_dependency(0);
-
- // disable for 5D
- if (cldnn::format::dimension(input_node.get_output_layout().format) == 5)
+ auto groups = deconv_node.get_groups();
+
+ bool perform_opt = false;
+ // fp16 and fp32 bfyx implementation supports transposed convolution
+ perform_opt |= cldnn::format::dimension(input_node.get_output_layout().format) == 4 &&
+ (input_node.get_output_layout().data_type == data_types::f32 || input_node.get_output_layout().data_type == data_types::f16) &&
+ !((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) &&
+ _lo.is_format_optimized(node->as<deconvolution>(), format::b_fs_yx_fsv16));
+ // int8/uint8 input
+ perform_opt |= (input_node.get_output_layout().data_type == data_types::i8 || input_node.get_output_layout().data_type == data_types::u8) &&
+ // imad convolution kernel limitation for groups
+ (groups == 1 || weights_node.get_output_layout().size.feature[0] % 4 == 0 ||
+ groups == static_cast<uint32_t>(input_node.get_output_layout().size.feature[0])) &&
+ // no uint8/int8 3D convolution support
+ input_node.get_output_layout().format.dimension() == 4;
+
+ if (!perform_opt)
continue;
- // Disable for blocked formats
- if ((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) &&
- _lo.is_format_optimized(node->as<deconvolution>(), format::b_fs_yx_fsv16)) {
- continue;
- }
-
primitive_id input_id = deconv_prim->input[0];
// setting convolution parameters based on deconvolution params
}
}
auto rename_id = deconv_id + "_tmp";
+ auto was_output = node->is_output();
+ if (was_output) {
+ node->set_output(false);
+ auto& outputs = p.get_outputs();
+ outputs.erase(std::remove(outputs.begin(), outputs.end(), node.get()), outputs.end());
+ }
p.rename(*node, rename_id);
// create convolution primitive
if (biases.size() != 0) {
auto conv_prim = std::make_shared<convolution>(deconv_id,
- input_id,
- weights_vec,
- bias_vec,
- stride,
- input_offset,
- tensor{ 1, 1, 1, 1 },
- output_padding);
+ input_id,
+ weights_vec,
+ bias_vec,
+ groups,
+ stride,
+ input_offset,
+ tensor{ 1, 1, 1, 1 },
+ output_padding);
p.get_or_create(conv_prim);
} else {
auto conv_prim = std::make_shared<convolution>(deconv_id,
- input_id,
- weights_vec,
- stride,
- input_offset,
- tensor{ 1, 1, 1, 1 },
- output_padding);
+ input_id,
+ weights_vec,
+ groups,
+ stride,
+ input_offset,
+ tensor{ 1, 1, 1, 1 },
+ output_padding);
p.get_or_create(conv_prim);
}
p.nodes_map.erase(rename_id);
}
- update_processing_order = true;
-
+ if (was_output) {
+ conv_node->set_output(true);
+ p.get_outputs().push_back(conv_node);
+ }
p.mark_if_data_flow(*conv_node);
conv_node->recalc_output_layout(true);
fuse_sigmoid_mul_to_swish(p);
fuse_simple_primitives(p);
fuse_activations(p);
- fuse_skip_layers(p);
+ optimize_fused_ops(p);
}
void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program_impl &p) {
}
}
-void prepare_primitive_fusing::fuse_skip_layers(program_impl& p) {
- // This loop tries fusing eltwise (sum) with deconvolution
- auto itr = p.get_processing_order().begin();
- while (itr != p.get_processing_order().end()) {
- auto node_itr = itr++;
- auto& node = (*node_itr);
-
- program_helpers::do_for_types<eltwise>(*node, [&p](eltwise_node& node) {
- if (node.get_primitive()->mode != eltwise_mode::sum || node.inputs_count() != 2)
- return;
-
- // both inputs should be deconvolutions
- if (!(node.input(0).is_type<deconvolution>() && node.input(1).is_type<deconvolution>())) {
- return;
- }
-
- auto& to_fuse_with = node.input(0);
- int to_fuse_index = 1;
-
- // remove dependencies and users of elwtise that is going to be extracted
- p.add_connection(node.input(to_fuse_index), to_fuse_with);
- p.remove_connection(node.input(to_fuse_index), node);
-
- p.get_processing_order().erase(&to_fuse_with);
- p.get_processing_order().insert(&node, &to_fuse_with);
-
- if (!node.get_fused_activations_funcs().empty()) {
- for (size_t i = 0; i < node.get_fused_activations_funcs().size(); i++) {
- to_fuse_with.add_fused_activation(node.get_fused_activations_funcs()[i],
- node.get_fused_activations_params()[i]);
- }
- }
- to_fuse_with.set_output_padding(node.get_output_layout().data_padding);
-
- p.extract_and_remove(node);
- });
- }
-}
-
void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
bool recalc_processing_order = false;
should_fuse |= input_data.is_type<mvn>();
+ should_fuse |= input_data.is_type<deconvolution>();
+
if (!should_fuse)
return;
should_fuse |= input_data.is_type<mvn>() && mvn_supports_fusings(input_data.as<mvn>());
+ should_fuse |= input_data.is_type<deconvolution>();
+
if (!should_fuse)
return;
should_fuse |= input_data.is_type<mvn>() && mvn_supports_fusings(input_data.as<mvn>()) &&
quantize_node.get_scale_shift_opt();
+ should_fuse |= input_data.is_type<activation>() && quantize_node.get_scale_shift_opt();
+
+ should_fuse |= input_data.is_type<deconvolution>() && quantize_node.get_scale_shift_opt() &&
+ // fp16/fp32 optimized kernels don't support chaning data type
+ (input_data.get_dependency(0).get_output_layout().data_type == data_types::u8 ||
+ input_data.get_dependency(0).get_output_layout().data_type == data_types::i8 ||
+ input_data.get_output_layout().data_type == out_layout.data_type);
+
if (!should_fuse)
return;
auto parent2 = parents[1];
bool can_fuse_parent1 = (parent1->is_type<convolution>() && conv_supports_fusings(parent1->as<convolution>())) ||
- (parent1->is_type<mvn>() && mvn_supports_fusings(parent1->as<mvn>()));
+ (parent1->is_type<mvn>() && mvn_supports_fusings(parent1->as<mvn>())) ||
+ (parent1->is_type<deconvolution>());
bool can_fuse_parent2 = (parent2->is_type<convolution>() && conv_supports_fusings(parent2->as<convolution>())) ||
- (parent2->is_type<mvn>() && mvn_supports_fusings(parent2->as<mvn>()));
+ (parent2->is_type<mvn>() && mvn_supports_fusings(parent2->as<mvn>())) ||
+ (parent2->is_type<deconvolution>());
std::vector<bool> can_fuse_parents = { can_fuse_parent1, can_fuse_parent2 };
p.get_processing_order().calc_processing_order(p);
}
+void prepare_primitive_fusing::optimize_fused_ops(program_impl& p) {
+ auto itr = p.get_processing_order().begin();
+ while (itr != p.get_processing_order().end()) {
+ auto node_itr = itr++;
+ auto& node = (*node_itr);
+
+ if (!node->has_fused_primitives())
+ continue;
+
+ // TODO: try more optimizations:
+ // 1. clamp optimization
+ // 2. fuse conv bias to quantize shift
+ auto& fused_prims = node->get_fused_primitives();
+
+ // Drop relu if the next fused op is quantize with u8 output and no in_shift
+ auto fp_itr = fused_prims.begin();
+ while (fp_itr != fused_prims.end()) {
+ auto curr_itr = fp_itr++;
+ if (fp_itr == fused_prims.end())
+ break;
+
+ auto& fp = *curr_itr;
+ auto& fp_next = *fp_itr;
+
+ if (fp.node->is_type<activation>() && fp_next.node->is_type<quantize>()) {
+ auto& activation_node = fp.node->as<activation>();
+ auto& quantize_node = fp_next.node->as<quantize>();
+ bool can_skip = activation_node.get_primitive()->activation_function == activation_func::relu &&
+ activation_node.get_primitive()->additional_params.a == 0.0f &&
+ fp.deps.empty() &&
+ (quantize_node.get_output_layout().data_type == data_types::u8 ||
+ quantize_node.get_output_layout().data_type == data_types::i8) &&
+ quantize_node.get_scale_shift_opt() &&
+ !quantize_node.get_need_pre_shift();
+
+ if (can_skip) {
+ fused_prims.erase(curr_itr);
+ }
+ }
+ }
+ }
+}
+
void prepare_conv_eltw_fusing::fuse_conv_depth_to_space(program_impl& p, program_node* node) {
// make sure this convolution have only 1 user and it's depth_to_space
// make sure convolution is not an output
return offset;
};
+ bool has_negative_scales = false;
bool need_post_scale = false;
bool need_post_shift = false;
bool need_pre_shift = false;
+ auto out_dt = quantize_node.get_output_layout().data_type;
+ bool need_clamp = levels != 256 || (out_dt != data_types::u8 && out_dt != data_types::i8);
bool per_tensor_in_scale = true;
bool per_tensor_in_shift = true;
bool per_tensor_in_range = true;
if (data_output_shift[s_offset] != 0.0f) {
need_post_shift = true;
}
+ if (data_input_scale[s_offset] < 0.0f) {
+ has_negative_scales = true;
+ }
}
}
}
if (half_to_float(data_output_shift[s_offset]) != 0.0f) {
need_post_shift = true;
}
+ if (half_to_float(data_input_scale[s_offset]) < 0.0f) {
+ has_negative_scales = true;
+ }
}
}
}
throw std::runtime_error("prepare_quantization: Unsupported precision of quantize output values");
}
+ if (has_negative_scales) {
+ return;
+ }
+
layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1));
float zero = 0.f;
auto in_scale_prim = std::make_shared<data>(quantize_node.id() + "_in_scale", memory::attach(dummy_layout, &zero, 1));
quantize_node.set_input_shift_val(in_shift_val);
}
+ if (need_clamp) {
+ quantize_node.set_need_clamp();
+ }
+
if (per_tensor_in_range) {
quantize_node.set_per_tensor_input_range();
quantize_node.set_input_lo_val(in_lo_val);
void fuse_sigmoid_mul_to_swish(program_impl &p);
void fuse_reorders(program_impl& p);
void fuse_activations(program_impl& p);
- void fuse_skip_layers(program_impl& p);
void fuse_simple_primitives(program_impl &p);
+ void optimize_fused_ops(program_impl &p);
layout_optimizer& _lo;
};
virtual bool validate(const primitive_inst& instance) const = 0;
std::string get_kernel_name() const { return _kernel_name; }
// TODO: added a derived class for weights reordering (maybe for all static data reordering)
- const kernel_selector::weights_reorder_params _weights_reorder_params;
+ kernel_selector::weights_reorder_params _weights_reorder_params;
// class typed_primitive_gpu_impl override this with return false;
virtual bool is_cpu() const { return true; }
}
const std::vector<fused_primitive_desc>& get_fused_primitives() const { return fused_prims; }
+ std::vector<fused_primitive_desc>& get_fused_primitives() { return fused_prims; }
size_t get_fused_inputs_count() const {
size_t count = 0;
program_node& input(size_t index = 0) const { return get_dependency(index); }
size_t inputs_count() const { return get_dependencies().size(); }
bool get_scale_shift_opt() const { return scale_shift_opt; }
+ bool get_need_pre_shift() { return need_pre_shift; }
void set_scale_shift_opt() { scale_shift_opt = true; }
void set_need_post_scale() { need_post_scale = true; }
void set_need_post_shift() { need_post_shift = true; }
void set_need_pre_shift() { need_pre_shift = true; }
+ void set_need_clamp() { need_clamp = true; }
void set_per_tensor_input_scale() { per_tensor_input_scale = true; }
void set_per_tensor_input_shift() { per_tensor_input_shift = true; }
void set_per_tensor_input_range() { per_tensor_input_range = true; }
need_post_scale,
need_post_shift,
need_pre_shift,
+ need_clamp,
per_tensor_input_range,
per_tensor_input_scale,
per_tensor_input_shift,
bool need_post_scale = false;
bool need_post_shift = false;
bool need_pre_shift = false;
+ bool need_clamp = false;
bool per_tensor_input_range = false;
bool per_tensor_input_scale = false;
return "g_os_is_zyx_isv16_osv16";
case format::g_os_is_yx_osv16_isv4:
return "g_os_is_yx_osv16_isv4";
+ case format::g_os_zyx_is_osv16_isv4:
+ return "g_os_zyx_is_osv16_isv4";
+ case format::g_os_zyx_is_osv16_isv16:
+ return "g_os_zyx_is_osv16_isv16";
+ case format::g_os_zyx_is_osv16_isv32:
+ return "g_os_zyx_is_osv16_isv32";
+ case format::g_os_zyx_is_osv32_isv4:
+ return "g_os_zyx_is_osv32_isv4";
+ case format::g_os_zyx_is_osv32_isv16:
+ return "g_os_zyx_is_osv32_isv16";
+ case format::g_os_zyx_is_osv32_isv32:
+ return "g_os_zyx_is_osv32_isv32";
default:
return "unknown (" + std::to_string(fmt.value) + ")";
}
return kernel_selector::weights_layout::g_os_iyx_osv32;
case format::gs_oiyx_gsv16:
return kernel_selector::weights_layout::gs_oiyx_gsv16;
+ case format::gs_oizyx_gsv16:
+ return kernel_selector::weights_layout::gs_oizyx_gsv16;
case format::gs_oiyx_gsv32:
return kernel_selector::weights_layout::gs_oiyx_gsv32;
case format::gyxio:
return kernel_selector::weights_layout::g_os_is_zyx_isv16_osv16;
case format::g_os_is_yx_osv16_isv4:
return kernel_selector::weights_layout::g_os_is_yx_osv16_isv4;
+ case format::g_os_zyx_is_osv16_isv4:
+ return kernel_selector::weights_layout::g_os_zyx_is_osv16_isv4;
+ case format::g_os_zyx_is_osv16_isv16:
+ return kernel_selector::weights_layout::g_os_zyx_is_osv16_isv16;
+ case format::g_os_zyx_is_osv16_isv32:
+ return kernel_selector::weights_layout::g_os_zyx_is_osv16_isv32;
+ case format::g_os_zyx_is_osv32_isv4:
+ return kernel_selector::weights_layout::g_os_zyx_is_osv32_isv4;
+ case format::g_os_zyx_is_osv32_isv16:
+ return kernel_selector::weights_layout::g_os_zyx_is_osv32_isv16;
+ case format::g_os_zyx_is_osv32_isv32:
+ return kernel_selector::weights_layout::g_os_zyx_is_osv32_isv32;
default:
throw std::invalid_argument("Unable to convert tensor layout " + fmt_to_str(f) + " to weights layout");
}
return cldnn::format::g_os_iyx_osv32;
case kernel_selector::weights_layout::gs_oiyx_gsv16:
return cldnn::format::gs_oiyx_gsv16;
+ case kernel_selector::weights_layout::gs_oizyx_gsv16:
+ return cldnn::format::gs_oizyx_gsv16;
case kernel_selector::weights_layout::gs_oiyx_gsv32:
return cldnn::format::gs_oiyx_gsv32;
case kernel_selector::weights_layout::gyxio:
return cldnn::format::g_os_is_zyx_isv16_osv16;
case kernel_selector::weights_layout::os_is_yx_osv16_isv4:
return cldnn::format::g_os_is_yx_osv16_isv4;
+ case kernel_selector::weights_layout::g_os_zyx_is_osv16_isv4:
+ return cldnn::format::g_os_zyx_is_osv16_isv4;
+ case kernel_selector::weights_layout::g_os_zyx_is_osv16_isv16:
+ return cldnn::format::g_os_zyx_is_osv16_isv16;
+ case kernel_selector::weights_layout::g_os_zyx_is_osv16_isv32:
+ return cldnn::format::g_os_zyx_is_osv16_isv32;
+ case kernel_selector::weights_layout::g_os_zyx_is_osv32_isv4:
+ return cldnn::format::g_os_zyx_is_osv32_isv4;
+ case kernel_selector::weights_layout::g_os_zyx_is_osv32_isv16:
+ return cldnn::format::g_os_zyx_is_osv32_isv16;
+ case kernel_selector::weights_layout::g_os_zyx_is_osv32_isv32:
+ return cldnn::format::g_os_zyx_is_osv32_isv32;
default:
return cldnn::format::bfyx;
}
new_vals[3] = align_to(vals[3], 4);
new_vals[2] = align_to(vals[2], 8);
}
+ if (ks_layout == kernel_selector::Tensor::bs_fs_yx_bsv16_fsv16) {
+ new_vals[0] = align_to(vals[0], 16);
+ new_vals[1] = align_to(vals[1], 16);
+ }
+ if (ks_layout == kernel_selector::Tensor::bs_fs_zyx_bsv16_fsv16) {
+ new_vals[0] = align_to(vals[0], 16);
+ new_vals[1] = align_to(vals[1], 16);
+ }
for (size_t i = 0; i < vec.size(); i++) {
const size_t tensor_index = vec.size() - 1 - i;
pitch *= (reserved_in_mem_count + lp + up);
}
+ if (ks_layout == kernel_selector::Tensor::bs_fs_yx_bsv16_fsv16) {
+ vec[2].pitch = (vec[0].v * vec[1].v) * 16;
+ vec[3].pitch = vec[2].pitch * vec[2].v;
+ }
+ if (ks_layout == kernel_selector::Tensor::bs_fs_zyx_bsv16_fsv16) {
+ vec[3].pitch = (vec[0].v * vec[1].v * vec[2].v) * 16;
+ vec[4].pitch = vec[3].pitch * vec[3].v;
+ }
+
const int feature_index =
kernel_selector::DataTensor::Channelndex(ks_layout, kernel_selector::Tensor::DataChannelName::FEATURE);
vec[feature_index].v /= split;
(input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16) &&
((input_layout.size.feature[0] / conv->split()) % 16 == 0 || input_layout.size.feature[0] == 3) &&
weights_layout.data_type == input_layout.data_type &&
- (weights_layout.size.batch[0] % 16 == 0 || weights_layout.size.batch[0] % 8 == 0) &&
+ (weights_layout.size.batch[0] % 16 == 0 || (weights_layout.size.batch[0] == 8 && conv->groups > 1)) &&
conv->dilation == tensor(1))
return true;
return false;
(input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16) &&
deconv->split() == 1)
return true;
+
+ if (input_layout.format.dimension() == 5 &&
+ (input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8) &&
+ deconv->split() == 1)
+ return true;
+
return false;
}
deconv->split() == 1 &&
(deconv->groups == 1 || (static_cast<int>(deconv->groups) == weights_layout.size.group[0])))
return true;
+
+ if (input_layout.format.dimension() == 4 &&
+ (input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8) &&
+ deconv->split() == 1)
+ return true;
+
return false;
}
#include "gpu/ocl_toolkit.h"
+#include "kernel_base.h"
+
#include <algorithm>
#include <fstream>
#include <iostream>
: engine(&engine_ref),
options(options),
processing_order() {
+ kernel_selector::KernelBase::ResetCounter();
set_options();
pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
prepare_nodes(topology);
+++ /dev/null
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <cmath>
-#include <gtest/gtest.h>
-#include <algorithm>
-#include "api/memory.hpp"
-#include <api/input_layout.hpp>
-#include "api/activation_grad.hpp"
-#include <api/topology.hpp>
-#include <api/network.hpp>
-#include <api/engine.hpp>
-#include <api/data.hpp>
-#include "test_utils/test_utils.h"
-#include "test_utils/float16.h"
-
-using namespace cldnn;
-using namespace tests;
-
-TEST(activation_grad_f16_fw_gpu, basic_bfyx_all_functions)
-{
- // Input:
- // 1 -2 -3 4 5
- // 2 2 3 4 -6
- // 3 -3 3 5 1
- // 1 1 1 -1 1
- //
- // a: 0.5, b: 2.5
- //
-
- const auto& engine = get_test_engine();
-
- auto input_grad = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 1, 5, 4 } });
- auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 1, 5, 4 } });
- auto input_params = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 1, 2, 1 } });
- set_values(input_grad,
- { FLOAT16(1.0f), FLOAT16(-2.0f),FLOAT16(-3.0f), FLOAT16(4.0f), FLOAT16(5.0f),
- FLOAT16(2.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(-6.0f),
- FLOAT16(3.0f), FLOAT16(-3.0f),FLOAT16(3.0f), FLOAT16(5.0f), FLOAT16(1.0f),
- FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(-1.0f), FLOAT16(1.0f) });
-
- set_values(input,
- { FLOAT16(12.0f), FLOAT16(-22.0f), FLOAT16(-32.0f), FLOAT16(42.0f), FLOAT16(52.0f),
- FLOAT16(22.0f),FLOAT16(22.0f), FLOAT16(32.0f), FLOAT16(42.0f), FLOAT16(-62.0f),
- FLOAT16(32.0f), FLOAT16(-32.0f), FLOAT16(32.0f), FLOAT16(52.0f), FLOAT16(12.0f),
- FLOAT16(12.0f), FLOAT16(12.0f), FLOAT16(12.0f), FLOAT16(-12.0f), FLOAT16(12.0f) });
-
- std::vector<activation_grad_func> funcs = {
- activation_grad_func::none,
- activation_grad_func::relu,
- activation_grad_func::relu_negative_slope,
- };
-
- activation_additional_params params = { 0.5f, 2.5f };
- set_values(input_params, { FLOAT16(params.a), FLOAT16(params.b) });
-
- for (uint8_t i = 0; i < 2; i++)
- {
- for (auto func : funcs)
- {
- topology topology(input_layout("input_grad", input_grad.get_layout()));
- topology.add(data("input", input));
-
- if (i == 0)
- {
- topology.add(activation_grad("activation_grad", "input_grad", "input", func, params));
- }
- else
- {
- topology.add(data("input_params", input_params));
- topology.add(activation_grad("activation_grad", "input_grad", "input", "input_params", func));
- }
-
- network network(engine, topology);
- network.set_input_data("input_grad", input_grad);
- auto outputs = network.execute();
- EXPECT_EQ(outputs.size(), size_t(1));
- EXPECT_EQ(outputs.begin()->first, "activation_grad");
-
- auto output_memory = outputs.at("activation_grad").get_memory();
- auto output_layout = output_memory.get_layout();
- auto output_ptr = output_memory.pointer<uint16_t>();
- auto input_grad_ptr = input_grad.pointer<uint16_t>();
- auto input_ptr = input.pointer<uint16_t>();
-
- int y_size = output_layout.size.spatial[1];
- int x_size = output_layout.size.spatial[0];
- int f_size = output_layout.size.feature[0];
- int b_size = output_layout.size.batch[0];
- EXPECT_EQ(output_layout.format, format::bfyx);
- EXPECT_EQ(y_size, 4);
- EXPECT_EQ(x_size, 5);
- EXPECT_EQ(f_size, 1);
- EXPECT_EQ(b_size, 1);
-
- std::vector<float> out;
-
- for (size_t i = 0; i < output_layout.get_linear_size(); ++i)
- {
- switch (func)
- {
- case activation_grad_func::none:
- EXPECT_FLOAT_EQ(float16_to_float32(input_grad_ptr[i]), float16_to_float32(output_ptr[i]));
- break;
- case activation_grad_func::relu:
- EXPECT_FLOAT_EQ(float16_to_float32(input_grad_ptr[i]) * (float16_to_float32(input_ptr[i]) > 0), float16_to_float32(output_ptr[i]));
- break;
- case activation_grad_func::relu_negative_slope:
- EXPECT_FLOAT_EQ(float16_to_float32(input_grad_ptr[i]) * ((float16_to_float32(input_ptr[i]) > 0) + params.a * (float16_to_float32(input_ptr[i]) <= 0)), float16_to_float32(output_ptr[i]));
- break;
- default:
- break;
- }
- }
- }
- }
-}
-
-TEST(activation_grad_f32_fw_gpu, basic_bfyx_all_functions)
-{
- // Input:
- // 1 -2 -3 4 5
- // 2 2 3 4 -6
- // 3 -3 3 5 1
- // 1 1 1 -1 1
- //
- // a: 0.5, b: 2.5
- //
-
- const auto& engine = get_test_engine();
-
- auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
- auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
- auto input_params = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
- set_values(input_grad,
- { 1.0f, -2.0f, -3.0f, 4.0f, 5.0f,
- 2.0f, 2.0f, 3.0f, 4.0f, -6.0f,
- 3.0f, -3.0f, 3.0f, 5.0f, 1.0f,
- 1.0f, 1.0f, 1.0f, -1.0f, 1.0f });
-
- set_values(input,
- { 12.0f, -22.0f, -32.0f, 42.0f, 52.0f,
- 22.0f, 22.0f, 32.0f, 42.0f, -62.0f,
- 32.0f, -32.0f, 32.0f, 52.0f, 12.0f,
- 12.0f, 12.0f, 12.0f, -12.0f, 12.0f });
-
- std::vector<activation_grad_func> funcs = {
- activation_grad_func::none,
- activation_grad_func::relu,
- activation_grad_func::relu_negative_slope,
- };
-
- activation_additional_params params = { 0.5f, 2.5f };
- set_values(input_params, { params.a, params.b });
-
- for (uint8_t i = 0; i < 2; i++)
- {
- for (auto func : funcs)
- {
- topology topology(input_layout("input_grad", input_grad.get_layout()));
- topology.add(data("input", input));
-
- if (i == 0)
- {
- topology.add(activation_grad("activation_grad", "input_grad", "input", func, params));
- }
- else
- {
- topology.add(data("input_params", input_params));
- topology.add(activation_grad("activation_grad", "input_grad", "input", "input_params", func));
- }
-
- network network(engine, topology);
- network.set_input_data("input_grad", input_grad);
- auto outputs = network.execute();
- EXPECT_EQ(outputs.size(), size_t(1));
- EXPECT_EQ(outputs.begin()->first, "activation_grad");
-
- auto output_memory = outputs.at("activation_grad").get_memory();
- auto output_layout = output_memory.get_layout();
- auto output_ptr = output_memory.pointer<float>();
- auto input_grad_ptr = input_grad.pointer<float>();
- auto input_ptr = input.pointer<float>();
-
- int y_size = output_layout.size.spatial[1];
- int x_size = output_layout.size.spatial[0];
- int f_size = output_layout.size.feature[0];
- int b_size = output_layout.size.batch[0];
- EXPECT_EQ(output_layout.format, format::bfyx);
- EXPECT_EQ(y_size, 4);
- EXPECT_EQ(x_size, 5);
- EXPECT_EQ(f_size, 1);
- EXPECT_EQ(b_size, 1);
-
- std::vector<float> out;
-
- for (size_t i = 0; i < output_layout.get_linear_size(); ++i)
- {
- switch (func)
- {
- case activation_grad_func::none:
- EXPECT_FLOAT_EQ(input_grad_ptr[i], output_ptr[i]);
- break;
- case activation_grad_func::relu:
- EXPECT_FLOAT_EQ(input_grad_ptr[i] * (input_ptr[i] > 0), output_ptr[i]);
- break;
- case activation_grad_func::relu_negative_slope:
- EXPECT_FLOAT_EQ(input_grad_ptr[i] * ((input_ptr[i] > 0) + params.a * (input_ptr[i] <= 0)), output_ptr[i]);
- break;
- default:
- break;
- }
- }
- }
- }
-}
}
}
+TEST(concat_gpu, i8_optimization_with_pool) {
+ const auto& engine = get_test_engine();
+
+ auto input0 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 8, 3}});
+ auto input1 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 8, 3}});
+
+
+ set_values<int8_t>(input0, { 11, 12, 13,
+ 14, 12, 12,
+ 13, -14, 13,
+ 13, -13, 15,
+ 16, -16, -13,
+ -14, 12, 11,
+ 16, -14, -13,
+ 18, -13, -15, });
+ set_values<int8_t>(input1, { 11, 12, 13,
+ 15, 12, 12,
+ 13, 14, 12,
+ 13, 13, 15,
+ 12, 14, 13,
+ 14, 17, 18,
+ 13, 14, 11,
+ 13, 13, 15 });
+
+
+ VF<int8_t> output_vec = {13, 13, 13, 13, 15, 15,
+ 16, 15, 16, 14, 13, 14,
+ 13, 14, 13, 18, 16, 18,
+ 16, 15, 16, 15, 18, 14,
+ 18, 14, -13, 15};
+
+ layout reorder_layout(data_types::i8, format::yxfb, {7, 2, 2, 1});
+ topology topology(input_layout("input0", input0.get_layout()),
+ input_layout("input1", input1.get_layout()),
+ pooling("pool0", "input0", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}),
+ pooling("pool1", "input1", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}),
+ concatenation("concat",
+ {"pool0", "pool1"},
+ concatenation::concatenation_axis::along_f,
+ data_types::i8,
+ padding{{0, 0, 0, 0}, 0}),
+ reorder("reorder", "concat", reorder_layout));
+ cldnn::build_options options;
+ options.set_option(cldnn::build_option::optimize_data(true));
+ network network(engine, topology, options);
+ network.set_input_data("input0", input0);
+ network.set_input_data("input1", input1);
+ auto outputs = network.execute();
+
+ EXPECT_EQ(outputs.size(), size_t(1));
+ EXPECT_EQ(outputs.begin()->first, "reorder");
+
+ auto output_memory = outputs.at("reorder").get_memory();
+ auto output_layout = output_memory.get_layout();
+ auto output_ptr = output_memory.pointer<int8_t>();
+
+ int y_size = output_layout.size.spatial[0];
+ int x_size = output_layout.size.spatial[1];
+ int f_size = output_layout.size.feature[0];
+ int b_size = output_layout.size.batch[0];
+ EXPECT_EQ(output_layout.format, format::yxfb);
+ EXPECT_EQ(y_size, 7);
+ EXPECT_EQ(x_size, 2);
+ EXPECT_EQ(f_size, 2);
+ EXPECT_EQ(b_size, 1);
+
+ for (size_t x = 0; x < output_layout.count(); ++x) {
+ EXPECT_EQ(output_vec[x], output_ptr[x]);
+ }
+}
+
+TEST(concat_gpu, i8_optimization_with_conv) {
+ // Filter : 3x2x3
+ // Stride : 2x1
+ // Input1 : 4x5
+ // Input2 : 4x5
+ // Input3 : 4x5
+ // Concat output : 3x4x5
+ // Conv input : 3x4x5
+ // Output : 2x3
+ //
+ // Input0:
+ // 1 2 3 -4 5
+ // 2 2 3 4 -6
+ // -3 3 3 5 1
+ // -1 1 1 1 -1
+ // Input1:
+ // 5 5 3 -4 5
+ // 2 -2 5 4 6
+ // 6 1 3 5 1
+ // 1 2 -3 -4 5
+ // Input2:
+ // -2 1 3 2 -5
+ // 1 2 -2 4 2
+ // 3 5 3 -3 1
+ // 5 4 3 2 1
+ //
+ // Filter:
+ // 1 2 1 1 2 1 1 2 1
+ // 2 1 2 2 1 2 2 1 2
+ //
+ // Output:
+ // 53 54 30
+ // 52 47 37
+ const auto& engine = get_test_engine();
+
+ auto input0 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 5, 4}});
+ auto input1 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 5, 4}});
+ auto input2 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 5, 4}});
+ auto weights = memory::allocate(engine, { data_types::i8, format::bfyx, { 1, 3, 3, 2 } });
+
+ set_values<int8_t>(weights, { 1, 2, 1,
+ 2, 1, 2, 1, 2, 1,
+ 2, 1, 2, 1, 2, 1,
+ 2, 1, 2 });
+
+ set_values<int8_t>(input0, { 1, 2, 3, -4, 5,
+ 2, 2, 3, 4, -6,
+ -3, 3, 3, 5, 1,
+ -1, 1, 1, 1, -1 });
+ set_values<int8_t>(input1, { 5, 5, 3, -4, 5,
+ 2, -2, 5, 4, 6,
+ 6, 1, 3, 5, 1,
+ 1, 2, -3, -4, 5 });
+ set_values<int8_t>(input2, { -2, 1, 3, 2, -5,
+ 1, 2, -2, 4, 2,
+ 3, 5, 3, -3, 1,
+ 5, 4, 3, 2, 1 });
+
+ VF<int8_t> output_vec = { 53, 54, 30, 52, 47, 37 };
+
+
+ layout reorder_layout(data_types::i8, format::bfyx, {1, 1, 2, 3});
+ topology topology(input_layout("input0", input0.get_layout()),
+ input_layout("input1", input1.get_layout()),
+ input_layout("input2", input2.get_layout()),
+ concatenation("concat",
+ {"input0", "input1", "input2"},
+ concatenation::concatenation_axis::along_f,
+ data_types::i8,
+ padding{{0, 0, 0, 0}, 0}),
+ data("weights", weights),
+ convolution("conv", "concat", { "weights" }, { 1,1,1,2 }),
+ reorder("output", "conv", reorder_layout));
+ cldnn::build_options options;
+ options.set_option(cldnn::build_option::optimize_data(true));
+ network network(engine, topology, options);
+ network.set_input_data("input0", input0);
+ network.set_input_data("input1", input1);
+ network.set_input_data("input2", input2);
+ auto outputs = network.execute();
+
+ EXPECT_EQ(outputs.size(), size_t(1));
+ EXPECT_EQ(outputs.begin()->first, "output");
+
+ auto output_memory = outputs.at("output").get_memory();
+ auto output_layout = output_memory.get_layout();
+ auto output_ptr = output_memory.pointer<int8_t>();
+
+ int y_size = output_layout.size.spatial[1];
+ int x_size = output_layout.size.spatial[0];
+ int f_size = output_layout.size.feature[0];
+ int b_size = output_layout.size.batch[0];
+ EXPECT_EQ(output_layout.format, format::bfyx);
+ EXPECT_EQ(y_size, 2);
+ EXPECT_EQ(x_size, 3);
+ EXPECT_EQ(f_size, 1);
+ EXPECT_EQ(b_size, 1);
+
+ for (size_t x = 0; x < output_layout.count(); ++x) {
+ EXPECT_EQ(output_vec[x], output_ptr[x]);
+ }
+}
+
+TEST(concat_gpu, i8_optimization_with_pool_conv) {
+ // Filter : 32x2x1
+ // Input offset : 0x0x-1x0
+ // Stride : 1x1
+ // Input0 : 16x3x2
+ // Input1 : 16x3x2
+ // Output : 1x1x3
+ //
+ // Input0:
+ // -3 6 0 2 -1 -1 6 0 5 4 1 6 2 4 0 5
+ // -2 -1 1 0 2 3 3 3 6 2 4 7 3 6 7 -1
+ // 7 7 5 -3 1 -1 5 4 0 3 -2 6 2 5 2 4
+ // 5 -1 3 6 2 0 -3 -1 0 3 0 -1 1 6 1 6
+ // 5 -2 2 -1 5 6 3 4 1 0 6 6 7 2 6 3
+ // 6 7 -1 5 5 6 -1 0 -1 5 5 2 3 -1 -3 4
+ //
+ // Input1:
+ // 4 -2 0 0 6 2 0 4 6 4 4 4 -3 -1 4 -3
+ // 1 0 -1 5 -1 1 4 2 7 7 0 2 3 4 -1 3
+ // 7 7 2 -3 -1 5 -2 2 6 -3 0 7 0 3 3 3
+ // -1 0 -2 -2 7 -3 -3 -1 5 0 3 4 0 -1 2 5
+ // 2 -1 2 -3 0 -3 -3 2 4 3 3 5 5 7 5 1
+ // 2 2 -3 6 6 7 1 -1 -2 5 1 -1 4 5 -3 -2
+ //
+ // Filters:
+ // -1, 2, -2, 2, -2, 1, 1, 0, -1, 1, 2, -2, 2, 1, -2, 0,
+ // 0, -2, -2, -2, -2, -1, 2, 1, 2, -1, -1, 0, 2, -2, -2, 1,
+ // 0, -2, 0, 1, -2, -1, -2, 0, -1, -1, -2, 1, -2, 0, 1, 2,
+ // 2, 2, 2, -2, 0, 2, 1, -2, -1, -1, 0, -2, 2, -1, 2, -1
+ //
+ // Output:
+ // -14, -35, -10
+
+ const auto& engine = get_test_engine();
+
+ auto input0 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 16, 3, 2}});
+ auto input1 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 16, 3, 2}});
+ auto weights = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 32, 2, 1}});
+
+ set_values<int8_t>(weights, {-1, 2, -2, 2, -2, 1, 1, 0, -1, 1, 2, -2, 2, 1, -2, 0, 0, -2, -2, -2, -2, -1, 2, 1, 2, -1, -1, 0, 2, -2, -2, 1,
+ 0, -2, 0, 1, -2, -1, -2, 0, -1, -1, -2, 1, -2, 0, 1, 2, 2, 2, 2, -2, 0, 2, 1, -2, -1, -1, 0, -2, 2, -1, 2, -1});
+
+ set_values<int8_t>(input0, {-3, 6, 0, 2, -1, -1, 6, 0, 5, 4, 1, 6, 2, 4, 0, 5,
+ -2, -1, 1, 0, 2, 3, 3, 3, 6, 2, 4, 7, 3, 6, 7, -1,
+ 7, 7, 5, -3, 1, -1, 5, 4, 0, 3, -2, 6, 2, 5, 2, 4,
+ 5, -1, 3, 6, 2, 0, -3, -1, 0, 3, 0, -1, 1, 6, 1, 6,
+ 5, -2, 2, -1, 5, 6, 3, 4, 1, 0, 6, 6, 7, 2, 6, 3,
+ 6, 7, -1, 5, 5, 6, -1, 0, -1, 5, 5, 2, 3, -1, -3, 4 });
+
+ set_values<int8_t>(input1, { 4, -2, 0, 0, 6, 2, 0, 4, 6, 4, 4, 4, -3, -1, 4, -3,
+ 1, 0, -1, 5, -1, 1, 4, 2, 7, 7, 0, 2, 3, 4, -1, 3,
+ 7, 7, 2, -3, -1, 5, -2, 2, 6, -3, 0, 7, 0, 3, 3, 3,
+ -1, 0, -2, -2, 7, -3, -3, -1, 5, 0, 3, 4, 0, -1, 2, 5,
+ 2, -1, 2, -3, 0, -3, -3, 2, 4, 3, 3, 5, 5, 7, 5, 1,
+ 2, 2, -3, 6, 6, 7, 1, -1, -2, 5, 1, -1, 4, 5, -3, -2});
+
+ VF<int8_t> output_vec = { -14, -35, -10 };
+
+ layout reorder_layout(data_types::i8, format::bfyx, {1, 1, 3, 1});
+ topology topology(input_layout("input0", input0.get_layout()),
+ input_layout("input1", input1.get_layout()),
+ pooling("pool0", "input0", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}),
+ pooling("pool1", "input1", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}),
+ concatenation("concat",
+ {"pool0", "pool1"},
+ concatenation::concatenation_axis::along_f,
+ data_types::i8,
+ padding{{0, 0, 0, 0}, 0}),
+ data("weights", weights),
+ convolution("conv", "concat", {"weights"}, {1, 1, 1, 1}, {0, 0, -1, 0}),
+ reorder("output", "conv", reorder_layout) );
+ cldnn::build_options options;
+ options.set_option(cldnn::build_option::optimize_data(true));
+ network network(engine, topology, options);
+ network.set_input_data("input0", input0);
+ network.set_input_data("input1", input1);
+ auto outputs = network.execute();
+
+ EXPECT_EQ(outputs.size(), size_t(1));
+ EXPECT_EQ(outputs.begin()->first, "output");
+
+ auto output_memory = outputs.at("output").get_memory();
+ auto output_layout = output_memory.get_layout();
+ auto output_ptr = output_memory.pointer<int8_t>();
+
+ int y_size = output_layout.size.spatial[0];
+ int x_size = output_layout.size.spatial[1];
+ int f_size = output_layout.size.feature[0];
+ int b_size = output_layout.size.batch[0];
+ EXPECT_EQ(output_layout.format, format::bfyx);
+ EXPECT_EQ(y_size, 3);
+ EXPECT_EQ(x_size, 1);
+ EXPECT_EQ(f_size, 1);
+ EXPECT_EQ(b_size, 1);
+
+ for (size_t x = 0; x < output_layout.count(); ++x) {
+ EXPECT_EQ(output_vec[x], output_ptr[x]);
+ }
+}
+
using TestParamType_concat = ::testing::tuple<size_t, // 0 - Input Batch size
std::vector<size_t>, // 1 - Inputs Features Sizes
size_t, // 2 - Input Y Size
#include "api/memory.hpp"
#include <api/input_layout.hpp>
#include "api/crop.hpp"
+#include <api/eltwise.hpp>
#include <api/topology.hpp>
#include <api/network.hpp>
#include <api/engine.hpp>
}
}
}
+
+// batch size, input feature, crop out feature, (in_out format, crop format)
+using crop_test_params = std::tuple<size_t, size_t, size_t, std::pair<cldnn::format,cldnn::format>>;
+
+class crop_gpu : public ::testing::TestWithParam<crop_test_params> {};
+
+TEST_P(crop_gpu, pad_test) {
+ auto p = GetParam();
+
+ const auto& engine = get_test_engine();
+
+ auto batch_num = std::get<0>(p);
+ auto feature_num = std::get<1>(p);
+ auto x_size = 1;
+ auto y_size = 1;
+ auto z_size = 1;
+
+ auto crop_batch_num = batch_num;
+ auto crop_feature_num_1 = std::get<2>(p);
+ auto crop_x_size = 1;
+ auto crop_y_size = 1;
+ auto crop_z_size = 1;
+ auto feature_offset_1 = feature_num - crop_feature_num_1;
+
+ auto in_out_format = std::get<3>(p).first;
+ auto crop_format = std::get<3>(p).second;
+
+ auto input = memory::allocate(engine, { data_types::f32, in_out_format, { tensor(spatial(x_size, y_size, z_size), feature(feature_num), batch(batch_num)) } });
+
+ topology topology;
+ topology.add(input_layout("input", input.get_layout()));
+ topology.add(reorder("reorder", "input", crop_format, data_types::f32));
+ topology.add(crop("crop1", "reorder", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size, crop_z_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0,0), batch(0)) }));
+ topology.add(reorder("out", "crop1", in_out_format, data_types::f32));
+
+ std::vector<float> input_vec;
+ std::vector<float> res;
+ std::vector<float> input_data;
+ std::vector<float> res_data;
+ for (size_t i = 0; i < feature_num; i++) {
+ input_data.push_back(static_cast<float>(i));
+ }
+ for (size_t i = 0; i < crop_feature_num_1; i++) {
+ res_data.push_back(input_data[feature_offset_1 + i]);
+ }
+ for (size_t i = 0; i < batch_num; i++) {
+ input_vec.insert(input_vec.end(), input_data.begin(), input_data.end());
+ res.insert(res.end(), res_data.begin(), res_data.end());
+ }
+ set_values(input, input_vec);
+ build_options bo;
+ bo.set_option(build_option::optimize_data(true));
+
+ network network(engine, topology, bo);
+ network.set_input_data("input", input);
+ auto outputs = network.execute();
+
+ auto output = outputs.at("out").get_memory();
+ auto output_ptr = output.pointer<float>();
+
+ for (size_t i = 0; i < res.size(); i++)
+ EXPECT_EQ(output_ptr[i], res[i]);
+}
+
+static std::vector<std::pair<cldnn::format,cldnn::format>> formats = {
+ std::make_pair<cldnn::format, cldnn::format>(format::bfyx, format::b_fs_yx_fsv16),
+ std::make_pair<cldnn::format, cldnn::format>(format::bfzyx, format::b_fs_zyx_fsv16),
+ std::make_pair<cldnn::format, cldnn::format>(format::bfyx, format::bs_fs_yx_bsv16_fsv16),
+ std::make_pair<cldnn::format, cldnn::format>(format::bfzyx, format::bs_fs_zyx_bsv16_fsv16),
+ };
+static std::vector<size_t> batches = {1, 8, 16, 17};
+static std::vector<size_t> in_features = {18, 24, 32};
+static std::vector<size_t> crop_features = {4, 8, 12, 17};
+
+INSTANTIATE_TEST_CASE_P(crop_test, crop_gpu,
+ ::testing::Combine(
+ ::testing::ValuesIn(batches),
+ ::testing::ValuesIn(in_features),
+ ::testing::ValuesIn(crop_features),
+ ::testing::ValuesIn(formats)
+ ), );
#include "test_utils/test_utils.h"
#include "test_utils/float16.h"
#include "api/reorder.hpp"
+#include "src/include/to_string_utils.h"
+
+namespace cldnn {
+template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
+}
using namespace cldnn;
using namespace tests;
+template <typename InputT>
+struct deconvolution_traits {
+ using accumulator_type = InputT;
+};
+
+template <>
+struct deconvolution_traits<uint8_t> {
+ using accumulator_type = int;
+};
+
+template <>
+struct deconvolution_traits<int8_t> {
+ using accumulator_type = int;
+};
+
+template <>
+struct deconvolution_traits<FLOAT16> {
+ using accumulator_type = float;
+};
+
+template<typename T>
+T kahan_summation(std::vector<T> &input) {
+ T sum = 0;
+ T c = 0;
+ for (T x : input) {
+ T y = x - c;
+ T t = sum + y;
+ c = (t - sum) - y;
+ sum = t;
+ }
+ return sum;
+}
+
+template <typename InputT, typename WeightsT, typename OutputT, typename AccumulatorT = typename deconvolution_traits<InputT>::accumulator_type>
+VVVF<OutputT> reference_deconvolution(
+ const VVVVF<InputT>& input, // fyx dimensions order
+ const VVVVF<WeightsT>& weights,
+ float bias,
+ tensor stride,
+ tensor offset,
+ size_t input_f_start
+) {
+ auto ifm = weights.size();
+ auto filter_z = static_cast<int>(weights[0].size());
+ auto filter_y = static_cast<int>(weights[0][0].size());
+ auto filter_x = static_cast<int>(weights[0][0][0].size());
+
+ auto in_z = static_cast<int>(input[0].size());
+ auto in_y = static_cast<int>(input[0][0].size());
+ auto in_x = static_cast<int>(input[0][0][0].size());
+
+ auto stride_x = stride.spatial[0];
+ auto stride_y = stride.spatial[1];
+ auto stride_z = stride.spatial[2];
+
+ auto offset_x = offset.spatial[0];
+ auto offset_y = offset.spatial[1];
+ auto offset_z = offset.spatial[2];
+
+ int out_x = 2 * offset_x + (in_x - 1) * stride_x + filter_x;
+ int out_y = 2 * offset_y + (in_y - 1) * stride_y + filter_y;
+ int out_z = 2 * offset_z + (in_z - 1) * stride_z + filter_z;
+ VVVF<OutputT> output(static_cast<size_t>(out_z), VVF<OutputT>(static_cast<size_t>(out_y), VF<OutputT>(static_cast<size_t>(out_x))));
+
+ for (int oz = 0; oz < out_z; ++oz) {
+ for (int oy = 0; oy < out_y; ++oy) {
+ for (int ox = 0; ox < out_x; ++ox) {
+ VF<AccumulatorT> values;
+ for (int fz = 0; fz < filter_z; ++fz) {
+ int iz = oz - filter_z + 1 - offset_z + fz;
+ if (iz < 0 || iz >= in_z * stride_z || iz % stride_z != 0)
+ continue;
+ iz = iz / stride_z;
+
+ for (int fy = 0; fy < filter_y; ++fy) {
+ int iy = oy - filter_y + 1 - offset_y + fy;
+ if (iy < 0 || iy >= in_y * stride_y || iy % stride_y != 0)
+ continue;
+ iy = iy / stride_y;
+
+ for (int fx = 0; fx < filter_x; ++fx) {
+ int ix = ox - filter_x + 1 - offset_x + fx;
+ if (ix < 0 || ix >= in_x * stride_x || ix % stride_x != 0)
+ continue;
+ ix = ix / stride_x;
+
+ for (size_t ifi = 0; ifi < ifm; ++ifi) {
+ auto in_val = input[input_f_start + ifi][iz][iy][ix];
+ auto wei_val = weights[ifi][filter_z - fz - 1][filter_y - fy - 1][filter_x - fx - 1];
+ values.push_back(static_cast<AccumulatorT>(in_val) * static_cast<AccumulatorT>(wei_val));
+ }
+ }
+ }
+ }
+ output[oz][oy][ox] = static_cast<OutputT>(kahan_summation<AccumulatorT>(values)) + static_cast<OutputT>(bias);
+ }
+ }
+ }
+ return output;
+}
+
TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) {
// Filter : 2x2
// Input : 2x2
EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]) << " index=" << i;
}
}
+
+struct deconvolution_random_test_params {
+ data_types input_type;
+ format::type input_format;
+ tensor input_size;
+ data_types weights_type;
+ format::type weights_format;
+ tensor weights_size;
+ tensor strides;
+ tensor input_offset;
+ bool with_bias;
+ data_types output_type;
+ cldnn::implementation_desc deconv_desc;
+
+ static std::string print_params(const testing::TestParamInfo<deconvolution_random_test_params>& param_info) {
+ auto& param = param_info.param;
+ auto to_string_neg = [](int v) {
+ if (v >= 0) {
+ return std::to_string(v);
+ } else {
+ return "m" + std::to_string(-v);
+ }
+ };
+
+ auto print_tensor = [&](const tensor& size) {
+ return to_string_neg(size.batch[0]) + "x" +
+ to_string_neg(size.feature[0]) + "x" +
+ to_string_neg(size.spatial[0]) + "x" +
+ to_string_neg(size.spatial[1]) + "x" +
+ to_string_neg(size.spatial[2]);
+ };
+
+ // construct a readable name
+ return "in_" + dt_to_str(param.input_type) +
+ "_" + fmt_to_str(param.input_format) +
+ "_" + print_tensor(param.input_size) +
+ "_wei_" + dt_to_str(param.weights_type) +
+ "_" + fmt_to_str(param.weights_format) +
+ "_" + print_tensor(param.weights_size) +
+ (param.with_bias ? "_bias" : "") +
+ "_s_" + print_tensor(param.strides) +
+ "_off_" + print_tensor(param.input_offset) +
+ "_out_" + dt_to_str(param.output_type) +
+ (!param.deconv_desc.kernel_name.empty() ? "_kernel_" + param.deconv_desc.kernel_name : "") +
+ (param.deconv_desc.output_format != format::any ? "_fmt_" + fmt_to_str(param.deconv_desc.output_format) : "");
+ }
+};
+
+template <typename T>
+struct typed_comparator {
+ static ::testing::AssertionResult compare(const char* lhs_expr, const char* rhs_expr, T ref, T val) {
+ return ::testing::internal::EqHelper<false>::Compare(lhs_expr, rhs_expr, ref, val);
+ }
+};
+
+template <>
+struct typed_comparator<float> {
+ static ::testing::AssertionResult compare(const char* lhs_expr, const char* rhs_expr, float ref, float val) {
+ return ::testing::internal::CmpHelperFloatingPointEQ<float>(lhs_expr, rhs_expr, ref, val);
+ }
+};
+
+template <>
+struct typed_comparator<FLOAT16> {
+ static ::testing::AssertionResult compare(const char* lhs_expr, const char* rhs_expr, FLOAT16 ref, FLOAT16 val) {
+ double abs_error = std::abs(0.05 * (double)ref);
+ return ::testing::internal::DoubleNearPredFormat(lhs_expr, rhs_expr, "5 percent", (double)ref, (double)val, abs_error);
+ }
+};
+
+template <typename T>
+struct type_test_ranges {
+ static constexpr int min = -1;
+ static constexpr int max = 1;
+};
+
+template <>
+struct type_test_ranges<uint8_t> {
+ static constexpr int min = 0;
+ static constexpr int max = 255;
+};
+
+template <>
+struct type_test_ranges<int8_t> {
+ static constexpr int min = -127;
+ static constexpr int max = 127;
+};
+
+#define TYPED_ASSERT_EQ(ref, val) \
+ ASSERT_PRED_FORMAT2(typed_comparator<decltype(ref)>::compare, ref, val)
+
+#define TYPED_EXPECT_EQ(ref, val) \
+ EXPECT_PRED_FORMAT2(typed_comparator<decltype(ref)>::compare, ref, val)
+
+template <typename InputT, typename WeightsT, typename OutputT>
+class deconvolution_random_test_base {
+public:
+ template <typename T>
+ void set_memory(cldnn::memory& mem, const VVVVVF<T>& data) {
+ auto ptr = mem.pointer<T>();
+
+ auto b = data.size();
+ auto f = data[0].size();
+ auto z = data[0][0].size();
+ auto y = data[0][0][0].size();
+ auto x = data[0][0][0][0].size();
+
+ for (size_t bi = 0; bi < b; ++bi) {
+ for (size_t fi = 0; fi < f; ++fi) {
+ for (size_t zi = 0; zi < z; ++zi) {
+ for (size_t yi = 0; yi < y; ++yi) {
+ for (size_t xi = 0; xi < x; ++xi) {
+ auto coords = cldnn::tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
+ auto offset = mem.get_layout().get_linear_offset(coords);
+ ptr[offset] = data[bi][fi][zi][yi][xi];
+ }
+ }
+ }
+ }
+ }
+ }
+
+ template <typename T>
+ void set_memory_weights(cldnn::memory& mem, const VVVVVVF<T>& data) {
+ auto ptr = mem.pointer<T>();
+
+ auto g = data.size();
+ auto b = data[0].size();
+ auto f = data[0][0].size();
+ auto z = data[0][0][0].size();
+ auto y = data[0][0][0][0].size();
+ auto x = data[0][0][0][0][0].size();
+
+ for (size_t gi = 0; gi < g; ++gi) {
+ for (size_t bi = 0; bi < b; ++bi) {
+ for (size_t fi = 0; fi < f; ++fi) {
+ for (size_t zi = 0; zi < z; ++zi) {
+ for (size_t yi = 0; yi < y; ++yi) {
+ for (size_t xi = 0; xi < x; ++xi) {
+ auto coords = cldnn::tensor(group(gi), batch(bi), feature(fi), spatial(xi, yi, zi, 0));
+ auto offset = mem.get_layout().get_linear_offset(coords);
+ ptr[offset] = data[gi][bi][fi][zi][yi][xi];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ template <typename T>
+ VVVVVF<T> generate_random(cldnn::tensor size) {
+ return generate_random_5d<T>(
+ size.batch[0],
+ size.feature[0],
+ size.spatial[2],
+ size.spatial[1],
+ size.spatial[0],
+ type_test_ranges<T>::min,
+ type_test_ranges<T>::max);
+ }
+
+ template <typename T>
+ VVVVVVF<T> generate_random_weights(cldnn::tensor size) {
+ return generate_random_6d<T>(
+ size.group[0],
+ size.batch[0],
+ size.feature[0],
+ size.spatial[2],
+ size.spatial[1],
+ size.spatial[0],
+ type_test_ranges<T>::min,
+ type_test_ranges<T>::max);
+ }
+
+ void run(cldnn::engine eng, const deconvolution_random_test_params& params, cldnn::build_options build_opts) {
+ uint32_t groups = params.weights_size.group[0];
+ size_t ifm = params.weights_size.feature[0];
+ size_t ofm = params.weights_size.batch[0];
+
+ auto input_data = generate_random<InputT>(params.input_size);
+ auto weights_data = generate_random_weights<WeightsT>(params.weights_size);
+
+ auto in_layout = cldnn::layout(cldnn::type_to_data_type<InputT>::value, params.input_format, params.input_size);
+ auto wei_layout = cldnn::layout(cldnn::type_to_data_type<WeightsT>::value, params.weights_format, params.weights_size);
+
+ auto wei_mem = cldnn::memory::allocate(eng, wei_layout);
+ auto in_mem = cldnn::memory::allocate(eng, in_layout);
+
+ this->set_memory_weights(wei_mem, weights_data);
+ this->set_memory(in_mem, input_data);
+
+ auto topo = cldnn::topology(
+ cldnn::input_layout("input", in_layout),
+ cldnn::data("weights", wei_mem)
+ );
+
+ VF<OutputT> bias_data;
+
+ if (params.with_bias) {
+ auto bias_size = cldnn::tensor(feature(params.weights_size.batch[0] * params.weights_size.group[0]));
+ auto bias_lay = cldnn::layout(cldnn::type_to_data_type<OutputT>::value, cldnn::format::bfyx, bias_size);
+ auto bias_mem = cldnn::memory::allocate(eng, bias_lay);
+ bias_data = generate_random_1d<OutputT>(bias_lay.size.feature[0], -1, 1);
+ set_values(bias_mem, bias_data);
+ topo.add(cldnn::data("bias", bias_mem));
+ topo.add(cldnn::deconvolution("deconv", "input", { "weights" }, { "bias" }, groups, params.strides, params.input_offset));
+ } else {
+ topo.add(cldnn::deconvolution("deconv", "input", { "weights" }, groups, params.strides, params.input_offset));
+ }
+
+ if (!params.deconv_desc.kernel_name.empty() || params.deconv_desc.output_format != cldnn::format::any) {
+ build_opts.set_option(cldnn::build_option::force_implementations({ { "deconv", params.deconv_desc } }));
+ }
+
+ auto net = cldnn::network(eng, topo, build_opts);
+ net.set_input_data("input", in_mem);
+
+ auto result = net.execute();
+
+ std::string kernel;
+ for (auto i : net.get_primitives_info()) {
+ if (i.original_id == "deconv")
+ kernel = i.kernel_id;
+ }
+
+ auto out_mem = result.at("deconv").get_memory();
+
+ // Compare results
+ {
+ auto ptr = out_mem.pointer<OutputT>();
+
+ auto b = static_cast<size_t>(out_mem.get_layout().size.batch[0]);
+ auto of = static_cast<size_t>(out_mem.get_layout().size.feature[0]);
+
+ for (size_t bi = 0; bi < b; ++bi) {
+ for (size_t fi = 0; fi < of; ++fi) {
+ size_t group = fi / ofm;
+ auto reference = reference_deconvolution<InputT, WeightsT, OutputT>(
+ input_data[bi],
+ weights_data[group][fi % ofm],
+ bias_data.empty() ? 0.f : static_cast<float>(bias_data[fi]),
+ params.strides,
+ params.input_offset,
+ group * ifm);
+
+ ASSERT_EQ(reference.size(), out_mem.get_layout().size.spatial[2]);
+ ASSERT_EQ(reference[0].size(), out_mem.get_layout().size.spatial[1]);
+ ASSERT_EQ(reference[0][0].size(), out_mem.get_layout().size.spatial[0]);
+
+ for (size_t zi = 0; zi < reference.size(); zi++) {
+ for (size_t yi = 0; yi < reference[0].size(); yi++) {
+ for (size_t xi = 0; xi < reference[0][0].size(); xi++) {
+ auto ref_val = reference[zi][yi][xi];
+ auto out_coords = cldnn::tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
+ auto out_offset = out_mem.get_layout().get_linear_offset(out_coords);
+ auto out_val = ptr[out_offset];
+ TYPED_ASSERT_EQ(ref_val, out_val)
+ << "at b=" << bi << ", f=" << fi << ", z=" << zi << ", y=" << yi << ", x=" << xi << std::endl
+ << " kernel: " << kernel;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+};
+
+#undef TYPED_ASSERT_EQ
+#undef TYPED_EXPECT_EQ
+
+class deconvolution_random_test : public testing::TestWithParam<deconvolution_random_test_params> {
+protected:
+ void SetUp() override {
+ eng = get_test_engine();
+ build_opts.set_option(cldnn::build_option::optimize_data(true));
+ }
+
+ void run() {
+ auto params = GetParam();
+ switch (params.input_type) {
+ case data_types::f32:
+ run_typed_in<float>();
+ break;
+ case data_types::f16:
+ run_typed_in<FLOAT16>();
+ break;
+ case data_types::i8:
+ run_typed_in<int8_t>();
+ break;
+ case data_types::u8:
+ run_typed_in<uint8_t>();
+ break;
+ default:
+ break;
+ }
+ }
+
+ cldnn::engine eng;
+ cldnn::build_options build_opts;
+
+private:
+ template <typename InputT, typename WeightsT, typename OutputT>
+ void run_typed() {
+ auto& params = GetParam();
+ deconvolution_random_test_base<InputT, WeightsT, OutputT> test;
+ test.run(eng, params, build_opts);
+ }
+
+ template <typename InputT, typename WeightsT>
+ void run_typed_in_wei() {
+ auto& params = GetParam();
+ switch (params.output_type) {
+ case data_types::f32:
+ run_typed<InputT, WeightsT, float>();
+ break;
+ case data_types::f16:
+ run_typed<InputT, WeightsT, FLOAT16>();
+ break;
+ default:
+ break;
+ }
+ }
+
+ template <typename InputT>
+ void run_typed_in() {
+ auto& params = GetParam();
+ switch (params.weights_type) {
+ case data_types::f32:
+ run_typed_in_wei<InputT, float>();
+ break;
+ case data_types::f16:
+ run_typed_in_wei<InputT, FLOAT16>();
+ break;
+ case data_types::i8:
+ run_typed_in_wei<InputT, int8_t>();
+ break;
+ case data_types::u8:
+ run_typed_in_wei<InputT, uint8_t>();
+ break;
+ default:
+ break;
+ }
+ }
+};
+
+class deconvolution_random_test_params_generator : public std::vector<deconvolution_random_test_params> {
+public:
+ using self = deconvolution_random_test_params_generator;
+ self& add(const deconvolution_random_test_params& params) {
+ push_back(params);
+ return *this;
+ }
+
+ self& add_smoke_2d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+ std::vector<int> batches = { 1, 2 };
+ for (auto b : batches) {
+ // 1x1
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 1, 1}, tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 1, 1}, {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ // 3x3
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 3, 3}, tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 3, 3}, {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ // Grouped
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(1, 1)), {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(3, 3)), tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(3, 3)), {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ // Depthwise
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(1, 1)), {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(3, 3)), tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(3, 3)), {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+
+ }
+ return *this;
+ }
+
+ self& add_smoke_3d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+ std::vector<int> batches = { 1, 2 };
+ for (auto b : batches) {
+ // 1x1
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 1, 1, 1}, tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 1, 1, 1}, {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ // 3x3
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 3, 3, 3}, tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 3, 3, 3}, {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ // Grouped
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(1, 1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(1, 1, 1)), {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(3, 3, 3)), tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(3, 3, 3)), {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ // Depthwise
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(1, 1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(1, 1, 1)), {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(3, 3, 3)), tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(3, 3, 3)), {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ }
+ return *this;
+ }
+
+ self& add_extra_2d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+ std::vector<int> batches = { 1, 2, 16 };
+ for (auto b : batches) {
+ // 1x1
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 1, 1}, tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 1, 1}, {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ // 3x3
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 3}, tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 3}, {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ // Asymmetric weights
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 2}, tensor(1), {0, 0, 0, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 2}, {1, 1, 2, 2}, {0, 0, 0, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ // Uneven groups
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(1, 1)), {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(3, 3)), tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(3, 3)), {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+ }
+ return *this;
+ }
+
+ self& add_extra_3d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+ std::vector<int> batches = { 1, 2, 16 };
+ for (auto b : batches) {
+ // 1x1
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 1, 1, 1}, tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 1, 1, 1}, {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ // 3x3
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 3, 3}, tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 3, 3}, {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ // Asymmetric weights
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 2, 4}, tensor(1), {0, 0, 0, -1, -2}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 2, 4}, {1, 1, 2, 2, 2}, {0, 0, 0, -1, -2}, true, out_dt, implementation_desc{out_fmt, ""} });
+ // Uneven groups
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(1, 1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(1, 1, 1)), {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(3, 3, 3)), tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(3, 3, 3)), {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+ }
+ return *this;
+ }
+
+ self& add_all_2d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+ return add_smoke_2d(in_dt, wei_dt, out_dt, in_fmt, out_fmt)
+ .add_extra_2d(in_dt, wei_dt, out_dt, in_fmt, out_fmt);
+ }
+
+ self& add_all_3d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+ return add_smoke_3d(in_dt, wei_dt, out_dt, in_fmt, out_fmt)
+ .add_extra_3d(in_dt, wei_dt, out_dt, in_fmt, out_fmt);
+ }
+};
+
+TEST_P(deconvolution_random_test, basic) {
+ run();
+}
+
+INSTANTIATE_TEST_CASE_P(smoke, deconvolution_random_test, testing::ValuesIn(
+ deconvolution_random_test_params_generator()
+ .add_smoke_2d(data_types::f32, data_types::f32, data_types::f32, format::bfyx, format::any)
+ .add_smoke_3d(data_types::f32, data_types::f32, data_types::f32, format::bfzyx, format::any)
+ .add_smoke_2d(data_types::f32, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+ .add_smoke_3d(data_types::f32, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+ .add_smoke_2d(data_types::f16, data_types::f16, data_types::f16, format::bfyx, format::any)
+ .add_smoke_3d(data_types::f16, data_types::f16, data_types::f16, format::bfzyx, format::any)
+ .add_smoke_2d(data_types::f16, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+ .add_smoke_3d(data_types::f16, data_types::f16, data_types::f16, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+ .add_smoke_2d(data_types::i8, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+ .add_smoke_3d(data_types::i8, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+), deconvolution_random_test_params::print_params);
+
+INSTANTIATE_TEST_CASE_P(DISABLED_extended, deconvolution_random_test, testing::ValuesIn(
+ deconvolution_random_test_params_generator()
+ .add_extra_2d(data_types::f32, data_types::f32, data_types::f32, format::bfyx, format::any)
+ .add_extra_3d(data_types::f32, data_types::f32, data_types::f32, format::bfzyx, format::any)
+ .add_extra_2d(data_types::f32, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+ .add_extra_3d(data_types::f32, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+ .add_extra_2d(data_types::f16, data_types::f16, data_types::f16, format::bfyx, format::any)
+ .add_extra_3d(data_types::f16, data_types::f16, data_types::f16, format::bfzyx, format::any)
+ .add_extra_2d(data_types::f16, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+ .add_extra_3d(data_types::f16, data_types::f16, data_types::f16, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+ .add_extra_2d(data_types::i8, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+ .add_all_2d(data_types::u8, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+ .add_extra_3d(data_types::i8, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+ .add_all_3d(data_types::u8, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+ .add_all_2d(data_types::i8, data_types::i8, data_types::f32, format::bs_fs_yx_bsv16_fsv16, format::bs_fs_yx_bsv16_fsv16)
+ .add_all_2d(data_types::u8, data_types::i8, data_types::f32, format::bs_fs_yx_bsv16_fsv16, format::bs_fs_yx_bsv16_fsv16)
+ .add_all_3d(data_types::i8, data_types::i8, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16)
+ .add_all_3d(data_types::u8, data_types::i8, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16)
+), deconvolution_random_test_params::print_params);
#include "api/data.hpp"
#include "api/resample.hpp"
#include "api/mvn.hpp"
+#include "api/deconvolution.hpp"
#include "test_utils/test_utils.h"
size_t reorders_count_fused = get_reorders_count(fused);
size_t reorders_count_not_fused = get_reorders_count(not_fused);
+ std::stringstream description;
+ description << std::endl << "not fused: " << std::endl;
+ for (auto i : not_fused.get_primitives_info()) {
+ description << " " << i.original_id << " " << i.kernel_id << std::endl;
+ }
+ description << "fused: " << std::endl;
+ for (auto i : fused.get_primitives_info()) {
+ description << " " << i.original_id << " " << i.kernel_id << std::endl;
+ }
+ SCOPED_TRACE(description.str());
+
// Subtract reorders count to handle execution in different layouts when input/output reorders can be added in the graph
ASSERT_EQ(fused.get_executed_primitives().size() - reorders_count_fused, p.expected_fused_primitives);
ASSERT_EQ(not_fused.get_executed_primitives().size() - reorders_count_not_fused, p.expected_not_fused_primitives);
#define CASE_CONV_FP32_10 {32, 16, 4, 5, 4}, {32, 32, 4, 5, 4}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
+#define CASE_CONV_FP32_13 {1, 16, 18, 5, 4}, {1, 16, 16, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP16_1 {1, 15, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_CONV_FP16_2 {1, 16, 4, 5}, {1, 32, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::os_is_yx_isv16_osv16, data_types::f16, format::bfyx
bc_test_params{CASE_CONV_FP32_9, 2, 3},
bc_test_params{CASE_CONV_FP32_11, 2, 3},
bc_test_params{CASE_CONV_FP32_12, 2, 3},
+ // bc_test_params{CASE_CONV_FP32_13, 2, 3}, - leads to mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8.basic/11 test failure
bc_test_params{CASE_CONV_FP16_6, 2, 3},
bc_test_params{CASE_CONV_FP16_7, 2, 3},
bc_test_params{CASE_CONV3D_S8S8_4, 2, 4},
}), );
+class conv_int8_relu_quantize : public ConvFusingTest {};
+TEST_P(conv_int8_relu_quantize, i8) {
+ auto p = GetParam();
+ create_topologies(input_layout("input", get_input_layout(p)),
+ data("weights", get_mem(get_weights_layout(p))),
+ data("bias", get_mem(get_bias_layout(p))),
+ data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+ data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+ data("out_lo", get_mem(get_single_element_layout(p), -127)),
+ data("out_hi", get_mem(get_single_element_layout(p), 127)),
+ convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+ activation("relu", "conv_prim", activation_func::relu),
+ quantize("quantize", "relu", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::i8),
+ reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
+ );
+ // Output elements are in range [-127, 127]
+ // 1.0f difference is allowed, since quantize can return different values in ref and scale_shift kernels
+ // due to big error of division (in ref kernel).
+ tolerance = 1.0f;
+ execute(p);
+}
+
+TEST_P(conv_int8_relu_quantize, u8) {
+ auto p = GetParam();
+ create_topologies(input_layout("input", get_input_layout(p)),
+ data("weights", get_mem(get_weights_layout(p))),
+ data("bias", get_mem(get_bias_layout(p))),
+ data("in_lo", get_mem(get_per_channel_layout(p), 0)),
+ data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+ data("out_lo", get_mem(get_single_element_layout(p), 0)),
+ data("out_hi", get_mem(get_single_element_layout(p), 255)),
+ convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+ activation("relu", "conv_prim", activation_func::relu),
+ quantize("quantize", "relu", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::u8),
+ reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
+ );
+ tolerance = 1.0f;
+ execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_relu_quantize,
+ ::testing::ValuesIn(std::vector<bc_test_params>{
+ bc_test_params{CASE_CONV_U8S8_1, 2, 4},
+ bc_test_params{CASE_CONV_U8S8_2, 2, 4},
+ bc_test_params{CASE_CONV_U8S8_3, 2, 4},
+ bc_test_params{CASE_CONV_U8S8_4, 2, 4},
+ bc_test_params{CASE_CONV_S8S8_1, 2, 4},
+ bc_test_params{CASE_CONV_S8S8_2, 2, 4},
+ bc_test_params{CASE_CONV_S8S8_3, 2, 4},
+ bc_test_params{CASE_CONV_S8S8_4, 2, 4},
+
+ bc_test_params{CASE_CONV3D_U8S8_1, 2, 4},
+ bc_test_params{CASE_CONV3D_U8S8_2, 2, 4},
+ bc_test_params{CASE_CONV3D_U8S8_3, 2, 4},
+ bc_test_params{CASE_CONV3D_U8S8_4, 2, 4},
+ bc_test_params{CASE_CONV3D_S8S8_1, 2, 4},
+ bc_test_params{CASE_CONV3D_S8S8_2, 2, 4},
+ bc_test_params{CASE_CONV3D_S8S8_3, 2, 4},
+ bc_test_params{CASE_CONV3D_S8S8_4, 2, 4},
+ }), );
+
class conv_int8_scale_activation_quantize_i8 : public ConvFusingTest {};
TEST_P(conv_int8_scale_activation_quantize_i8, basic) {
auto p = GetParam();
lrn_test_params{CASE_LRN_FP16_5, 2, 4, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"},
}), );
+
+/* ----------------------------------------------------------------------------------------------------- */
+/* -------------------------------- Activation cases --------------------------------------------------- */
+/* ----------------------------------------------------------------------------------------------------- */
+struct activation_test_params {
+ tensor input_size;
+ data_types input_type;
+ format input_format;
+ activation_func activation_function;
+ activation_additional_params additional_params;
+ data_types default_type;
+ format default_format;
+ size_t expected_fused_primitives;
+ size_t expected_not_fused_primitives;
+};
+
+#define CASE_ACTIVATION_F32_1 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_2 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_3 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_4 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_5 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_6 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F32_1 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F32_2 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::hard_sigmoid, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F32_3 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_1 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_2 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_3 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_4 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_5 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_6 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F16_1 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F16_2 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::hard_sigmoid, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F16_3 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_U8_1 {1, 16, 8, 8}, data_types::u8, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_U8_2 {1, 16, 8, 8}, data_types::u8, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_U8_1 {1, 16, 8, 8, 8}, data_types::u8, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+
+class ActivationFusingTest : public ::BaseFusingTest<activation_test_params> {
+public:
+ void execute(activation_test_params& p) {
+ auto input_prim = get_mem(get_input_layout(p));
+
+ network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+ network network_fused(this->engine, this->topology_fused, bo_fused);
+
+ network_fused.set_input_data("input", input_prim);
+ network_not_fused.set_input_data("input", input_prim);
+
+ compare(network_not_fused, network_fused, p);
+ }
+
+ layout get_input_layout(activation_test_params& p) { return layout{p.input_type, p.input_format, p.input_size}; }
+
+ layout get_per_channel_layout(activation_test_params& p) {
+ return layout{p.default_type, p.default_format, tensor{1, p.input_size.feature[0], 1, 1}};
+ }
+ activation_func get_activation_function(activation_test_params& p) { return p.activation_function; }
+ activation_additional_params get_activation_additional_params(activation_test_params& p) { return p.additional_params; }
+ format get_input_format(activation_test_params &p) { return p.input_format; }
+};
+
+class activation_quantize_i8 : public ActivationFusingTest {};
+TEST_P(activation_quantize_i8, basic) {
+ auto p = GetParam();
+ create_topologies(input_layout("input", get_input_layout(p)),
+ activation("act", "input", get_activation_function(p), get_activation_additional_params(p)),
+ data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
+ data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
+ data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
+ data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
+ quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
+ reorder("reorder_bfyx", "quant", format::bfyx, data_types::f32));
+
+ tolerance = 1.f;
+ execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ fusings_gpu,
+ activation_quantize_i8,
+ ::testing::ValuesIn(std::vector<activation_test_params>{
+ activation_test_params{CASE_ACTIVATION_F32_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F32_2, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F32_3, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F32_4, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F32_5, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F32_6, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3}
+ }), );
+
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_fusings_gpu,
+ activation_quantize_i8,
+ ::testing::ValuesIn(std::vector<activation_test_params>{
+ // fp16 cases
+ activation_test_params{CASE_ACTIVATION_F16_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F16_2, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F16_3, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F16_4, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F16_5, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F16_6, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 3},
+
+ // u8 cases
+ activation_test_params{CASE_ACTIVATION_U8_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_U8_2, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_U8_1, 2, 3}
+ }), );
+
+class activation_opt_quantize_i8 : public ActivationFusingTest {};
+TEST_P(activation_opt_quantize_i8, basic) {
+ auto p = GetParam();
+ implementation_desc activation_impl = {get_input_format(p), "activation_opt"};
+ this->bo_fused.set_option(build_option::force_implementations({{"act", activation_impl}}));
+
+ create_topologies(input_layout("input", get_input_layout(p)),
+ activation("act", "input", get_activation_function(p), get_activation_additional_params(p)),
+ data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
+ data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
+ data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
+ data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
+ quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
+ reorder("reorder_bfyx", "quant", format::bfyx, data_types::f32));
+
+ tolerance = 1.f;
+ execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ fusings_gpu,
+ activation_opt_quantize_i8,
+ ::testing::ValuesIn(std::vector<activation_test_params>{
+ activation_test_params{CASE_ACTIVATION_F32_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F32_2, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F32_3, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3}
+ }), );
+
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_fusings_gpu,
+ activation_opt_quantize_i8,
+ ::testing::ValuesIn(std::vector<activation_test_params>{
+ // fp16 cases
+ activation_test_params{CASE_ACTIVATION_F16_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F16_2, 2, 3},
+ activation_test_params{CASE_ACTIVATION_F16_3, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 3},
+
+ // u8 cases
+ activation_test_params{CASE_ACTIVATION_U8_1, 2, 3},
+ activation_test_params{CASE_ACTIVATION_3D_U8_1, 2, 3}
+ }), );
+
+/* ----------------------------------------------------------------------------------------------------- */
+/* --------------------------------------- Deconvolution cases ----------------------------------------- */
+/* ----------------------------------------------------------------------------------------------------- */
+using deconv_test_params = bc_test_params;
+
+// in_shape; out_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
+#define CASE_DECONV_FP32_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::is_os_yx_osv16_isv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::is_os_yx_osv16_isv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::gs_oiyx_gsv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::is_os_yx_osv16_isv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::is_os_yx_osv16_isv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_8 {1, 32, 4, 5}, {1, 32, 7, 9}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::gs_oiyx_gsv16, data_types::f32, format::bfyx
+
+#define CASE_DECONV_FP16_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_osv16_isv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_osv16_isv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::gs_oiyx_gsv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_osv16_isv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_osv16_isv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_8 {1, 32, 4, 5}, {1, 32, 7, 9}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::gs_oiyx_gsv16, data_types::f16, format::bfyx
+
+#define CASE_DECONV_S8S8_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::goiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_8 {1, 32, 4, 5}, {1, 32, 7, 9}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::goiyx, data_types::f32, format::bfyx
+
+#define CASE_DECONV_U8S8_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::goiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_8 {1, 32, 4, 5}, {1, 32, 7, 9}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::goiyx, data_types::f32, format::bfyx
+
+// 3D
+// in_shape; out_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
+#define CASE_DECONV_FP32_3D_1 {1, 15, 4, 5, 3}, {1, 30, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfzyx, data_types::f32, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_2 {1, 16, 4, 5, 3}, {1, 32, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_3 {1, 16, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_4 {1, 32, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::gs_oizyx_gsv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_5 {1, 15, 4, 5, 3}, {1, 30, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfzyx, data_types::f32, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_6 {1, 16, 4, 5, 3}, {1, 32, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_7 {1, 16, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_8 {1, 32, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::gs_oizyx_gsv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_9 {16, 16, 4, 5, 3}, {16, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+
+#define CASE_DECONV_FP16_3D_1 {1, 15, 4, 5, 3}, {1, 30, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfzyx, data_types::f16, format::oizyx, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_2 {1, 16, 4, 5, 3}, {1, 32, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_3 {1, 16, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_4 {1, 32, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::gs_oizyx_gsv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_5 {1, 15, 4, 5, 3}, {1, 30, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfzyx, data_types::f16, format::oizyx, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_6 {1, 16, 4, 5, 3}, {1, 32, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_7 {1, 16, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_8 {1, 32, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::gs_oizyx_gsv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_9 {16, 16, 4, 5, 3}, {16, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::bs_fs_zyx_bsv16_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+
+#define CASE_DECONV_S8S8_3D_1 {1, 15, 4, 5, 3}, {1, 30, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_2 {1, 16, 4, 5, 3}, {1, 32, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_3 {1, 16, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_4 {1, 32, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::goizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_5 {1, 15, 4, 5, 3}, {1, 30, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_6 {1, 16, 4, 5, 3}, {1, 32, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_7 {1, 16, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_8 {1, 32, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::goizyx, data_types::f32, format::bfzyx
+
+#define CASE_DECONV_U8S8_3D_1 {1, 15, 4, 5, 3}, {1, 30, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_2 {1, 16, 4, 5, 3}, {1, 32, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_3 {1, 16, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_4 {1, 32, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::goizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_5 {1, 15, 4, 5, 3}, {1, 30, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_6 {1, 16, 4, 5, 3}, {1, 32, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_7 {1, 16, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_8 {1, 32, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::goizyx, data_types::f32, format::bfzyx
+
+class DeconvolutionFusingTest : public ::WeightsPrimitiveFusingTest {};
+
+class deconv_actv : public DeconvolutionFusingTest {};
+TEST_P(deconv_actv, basic) {
+ auto p = GetParam();
+ create_topologies(
+ input_layout("input", get_input_layout(p)),
+ data("weights", get_mem(get_weights_layout(p))),
+ deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
+ activation("act", "deconv", activation_func::relu),
+ reorder("out", "act", p.default_format, data_types::f32)
+ );
+ // Need much higher tolerance because of deconvolution -> convolution optimization
+ tolerance = 1.f;
+ execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_actv,
+ ::testing::ValuesIn(std::vector<deconv_test_params>{
+ deconv_test_params{ CASE_DECONV_FP32_1, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_2, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_3, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_4, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_5, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_6, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_7, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_8, 2, 3 },
+
+ deconv_test_params{ CASE_DECONV_FP16_1, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_2, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_3, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_4, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_5, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_6, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_7, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_8, 2, 3 },
+
+ deconv_test_params{ CASE_DECONV_U8S8_1, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_2, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_3, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_4, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_5, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_6, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_7, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_8, 2, 3 },
+
+ deconv_test_params{ CASE_DECONV_S8S8_1, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_2, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_3, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_4, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_5, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_6, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_7, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_8, 2, 3 },
+
+ deconv_test_params{ CASE_DECONV_FP32_3D_1, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_2, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_3, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_4, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_5, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_6, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_7, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_8, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_9, 2, 3 },
+
+ deconv_test_params{ CASE_DECONV_FP16_3D_1, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_2, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_3, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_4, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_5, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_6, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_7, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_8, 2, 3 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_9, 2, 3 },
+
+ deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 3 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 3 },
+
+ deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 3 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 3 },
+}), );
+
+class deconv_actv_eltw_actv : public DeconvolutionFusingTest {};
+TEST_P(deconv_actv_eltw_actv, basic) {
+ auto p = GetParam();
+ create_topologies(
+ input_layout("input", get_input_layout(p)),
+ data("weights", get_mem(get_weights_layout(p))),
+ data("eltw_data", get_mem(get_output_layout(p))),
+ deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
+ activation("act1", "deconv", activation_func::relu),
+ eltwise("eltw", {"act1", "eltw_data"}, eltwise_mode::sum),
+ activation("act2", "eltw", activation_func::relu),
+ reorder("out", "act2", p.default_format, data_types::f32)
+ );
+ // Need much higher tolerance because of deconvolution -> convolution optimization
+ tolerance = 1.f;
+ execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_actv_eltw_actv,
+ ::testing::ValuesIn(std::vector<deconv_test_params>{
+ // Some fusings disabled under deconvolution -> convolution optimization
+ deconv_test_params{ CASE_DECONV_FP32_1, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_8, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_FP16_1, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_8, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_U8S8_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_8, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_S8S8_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_8, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_FP32_3D_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_8, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_9, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_FP16_3D_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_8, 2, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_9, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 5 },
+}), );
+
+class deconv_scale_actv_quant_i8 : public DeconvolutionFusingTest {};
+TEST_P(deconv_scale_actv_quant_i8, basic) {
+ auto p = GetParam();
+ create_topologies(
+ input_layout("input", get_input_layout(p)),
+ data("weights", get_mem(get_weights_layout(p))),
+ data("scale_data", get_mem(get_per_channel_layout(p), 1.f/p.kernel.count())),
+ data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+ data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+ data("out_lo", get_mem(get_single_element_layout(p), -127)),
+ data("out_hi", get_mem(get_single_element_layout(p), 127)),
+ deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
+ scale("scale", "deconv", "scale_data"),
+ activation("actv", "scale", activation_func::softsign),
+ quantize("quant", "actv", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
+ reorder("out", "quant", p.default_format, data_types::f32)
+ );
+
+ tolerance = 1.f;
+ execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
+ ::testing::ValuesIn(std::vector<deconv_test_params>{
+ // Some fusings disabled under deconvolution -> convolution optimization
+ // Quantize fusing disabled for fp16/fp32 for performance reasons
+ // deconv_test_params{ CASE_DECONV_FP32_1, 4, 5 }, FIXME Failure due to activation + quantization fusing
+ deconv_test_params{ CASE_DECONV_FP32_2, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_4, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_5, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_6, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_7, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_8, 3, 5 },
+
+ deconv_test_params{ CASE_DECONV_FP16_1, 4, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_2, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_4, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_5, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_6, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_7, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_8, 3, 5 },
+
+ deconv_test_params{ CASE_DECONV_U8S8_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_8, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_S8S8_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_8, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_FP32_3D_1, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_2, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_3, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_4, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_5, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_6, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_7, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_8, 3, 5 },
+ // FIXME no quantize implementation for bs_fs_yx_bsv16_fsv16 format AND add_required_reorders pass completely ruins data types
+ // add_required_reorders pass tries to reorder everything to output type if no format exists, this ruins fp32 -> int8 quantize
+ // deconv_test_params{ CASE_DECONV_FP32_3D_9, 3, 5 },
+
+ deconv_test_params{ CASE_DECONV_FP16_3D_1, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_2, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_3, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_4, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_5, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_6, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_7, 3, 5 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_8, 3, 5 },
+ // deconv_test_params{ CASE_DECONV_FP16_3D_9, 3, 5 },
+
+ deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 5 },
+
+ deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 5 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 5 },
+}), );
+
+class deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8 : public DeconvolutionFusingTest {};
+TEST_P(deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, basic) {
+ auto p = GetParam();
+ create_topologies(
+ input_layout("input", get_input_layout(p)),
+ data("weights", get_mem(get_weights_layout(p))),
+ data("scale1_data", get_mem(get_per_channel_layout(p), 1.f / p.kernel.count())),
+ data("in1_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+ data("in1_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+ data("out1_lo", get_mem(get_single_element_layout(p), 0)),
+ data("out1_hi", get_mem(get_single_element_layout(p), 255)),
+ data("eltw_data", get_mem(layout(p.default_type, p.input_format, p.out_shape))),
+ data("scale2_data", get_mem(get_per_channel_layout(p), 1.f / p.kernel.count())),
+ data("in2_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+ data("in2_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+ data("out2_lo", get_mem(get_single_element_layout(p), -127)),
+ data("out2_hi", get_mem(get_single_element_layout(p), 127)),
+ deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
+ scale("scale1", "deconv", "scale1_data"),
+ activation("actv1", "scale1", activation_func::relu),
+ quantize("quant1", "actv1", "in1_lo", "in1_hi", "out1_lo", "out1_hi", 256, data_types::u8),
+ eltwise("eltw", {"quant1", "eltw_data"}, eltwise_mode::sum, p.default_type),
+ scale("scale2", "eltw", "scale2_data"),
+ activation("actv2", "scale2", activation_func::relu),
+ quantize("quant2", "actv2", "in2_lo", "in2_hi", "out2_lo", "out2_hi", 255, data_types::i8),
+ reorder("out", "quant2", p.default_format, data_types::f32)
+ );
+
+ tolerance = 1.f;
+ execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8,
+ ::testing::ValuesIn(std::vector<deconv_test_params>{
+ // Some fusings disabled under deconvolution -> convolution optimization
+ // Quantize fusing disabled for fp16/fp32 for performance reasons
+ // deconv_test_params{ CASE_DECONV_FP32_1, 7, 9 }, FIXME Failure due to activation + quantization fusing
+ deconv_test_params{ CASE_DECONV_FP32_2, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_3, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_4, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_5, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_6, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_7, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_8, 6, 9 },
+
+ deconv_test_params{ CASE_DECONV_FP16_1, 7, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_2, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_3, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_4, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_5, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_6, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_7, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_8, 6, 9 },
+
+ deconv_test_params{ CASE_DECONV_U8S8_1, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_2, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_3, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_4, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_5, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_6, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_7, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_8, 2, 9 },
+
+ deconv_test_params{ CASE_DECONV_S8S8_1, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_2, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_3, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_4, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_5, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_6, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
+
+ // deconv_test_params{ CASE_DECONV_FP32_3D_1, 6, 9 }, FIXME Failure due to activation + quantization fusing
+ deconv_test_params{ CASE_DECONV_FP32_3D_2, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_3, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_4, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_5, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_6, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_7, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP32_3D_8, 6, 9 },
+ // deconv_test_params{ CASE_DECONV_FP32_3D_9, 6, 9 },
+
+ deconv_test_params{ CASE_DECONV_FP16_3D_1, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_2, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_3, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_4, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_5, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_6, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_7, 6, 9 },
+ deconv_test_params{ CASE_DECONV_FP16_3D_8, 6, 9 },
+ // deconv_test_params{ CASE_DECONV_FP16_3D_9, 6, 9 },
+
+ deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 9 },
+ deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 9 },
+
+ deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 9 },
+ deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 9 },
+}), );
using VVVVF = std::vector<VVVF<T>>; // batch of 3d feature maps
template<typename T>
using VVVVVF = std::vector<VVVVF<T>>; // split of bfyx filters
+template<typename T>
+using VVVVVVF = std::vector<VVVVVF<T>>; // split of bfyx filters
template<typename T>
inline VF<T> flatten_4d(cldnn::format input_format, VVVVF<T> &data) {
return v;
}
+template<typename T>
+VVVVVVF<T> generate_random_6d(size_t a, size_t b, size_t c, size_t d, size_t e, size_t f, int min, int max, int k = 8) {
+ VVVVVVF<T> v(a);
+ for (size_t i = 0; i < a; ++i)
+ v[i] = generate_random_5d<T>(b, c, d, e, f, min, max, k);
+ return v;
+}
+
template <class T> void set_value(const cldnn::pointer<T>& ptr, uint32_t index, T value) { ptr[index] = value; }
template <class T> T get_value(const cldnn::pointer<T>& ptr, uint32_t index) { return ptr[index]; }
, do_sum_(false)
, max_data_reg_idx_(31), max_unroll_(12), compute_reg_step_(1)
, data_reg_base_idx_(0)
- , bf16_emu_(nullptr), eltwise_injector_(nullptr)
+ , bf16_emu_(nullptr)
+ , attr_(pd->attr())
+ , jit_eltwise_injectors_(0)
{
using namespace types;
using namespace Xbyak;
const auto dst_md = memory_desc_wrapper(pd->dst_pd());
dst_os_stride_ = dst_md.blk_off(0, 0, 0, 1);
-
+ bool do_depthwise_ = false;
auto &post_ops = pd->attr()->post_ops_;
- const int eltwise_ind = post_ops.find(primitive_kind::eltwise);
- do_eltwise_ = eltwise_ind != -1;
- if (do_eltwise_)
- eltwise_injector_ = new jit_uni_eltwise_injector_f32<avx512_common>(
- this, post_ops.entry_[eltwise_ind].eltwise, true,
- reserved_eltwise_gpr, reserved_eltwise_maskr);
+ for (int i = 0; i < post_ops.len_; i++) {
+ auto& post_op = post_ops.entry_[i];
+ if (post_op.is_eltwise()) {
+ jit_eltwise_injectors_.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(this,
+ post_op.eltwise.alg,
+ post_op.eltwise.alpha,
+ post_op.eltwise.beta,
+ true, reserved_eltwise_gpr, reserved_eltwise_maskr));
+ } else if (post_op.is_depthwise()) {
+ do_depthwise_ = true;
+ }
+ }
do_sum_ = dst_data_type != data_type::f32
&& post_ops.contain(primitive_kind::sum, 0);
if (do_bias_)
vreg_bias = Zmm(data_reg_base_idx_++);
+ if (do_depthwise_)
+ vreg_dw = Zmm(data_reg_base_idx_++);
+
vlen_ = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
if (!mayiuse(avx512_core_bf16)) {
mov(reg_len, ptr[reg_param + PARAM_OFF(spatial_length)]);
mov(reg_oc_iter, ptr[reg_param + PARAM_OFF(oc_work)]);
+ mov(reg_oc_offset, ptr[reg_param + PARAM_OFF(oc_offset)]);
+
if (do_sum_)
vbroadcastss(vreg_sum_scale, ptr[reg_param + PARAM_OFF(sum_scale)]);
#undef PARAM_OFF
vfmadd231ps(vreg_dst(idx), vreg_prev_dst(idx), vreg_sum_scale);
}
- if (do_eltwise_)
- eltwise_injector_->compute_vector(vreg_dst_idx(idx));
+ int eltwise_inj_idx = 0;
+ const auto& p = attr_->post_ops_;
+ for (int i = 0; i < p.len_; i++) {
+ auto& post_op = p.entry_[i];
+ if (post_op.is_eltwise()) {
+ jit_eltwise_injectors_[eltwise_inj_idx]->compute_vector(vreg_dst_idx(idx));
+ eltwise_inj_idx++;
+ } else if (post_op.is_depthwise()) {
+ mov(reg_dw, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+ lea(reg_dw, ptr[reg_dw + reg_oc_offset]);
+
+ switch (post_op.depthwise.alg) {
+ case alg_kind::depthwise_scale_shift: {
+ vbroadcastss(vreg_dw, ptr[reg_dw]);
+ vmulps(vreg_dst(idx), vreg_dst(idx), vreg_dw);
+ mov(reg_dw, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+ lea(reg_dw, ptr[reg_dw + reg_oc_offset]);
+ vbroadcastss(vreg_dw, ptr[reg_dw]);
+ vaddps(vreg_dst(idx), vreg_dst(idx), vreg_dw);
+ break;
+ }
+ case alg_kind::depthwise_prelu: {
+ vpxord(vreg_dw, vreg_dw, vreg_dw);
+ vcmpps(kmask, vreg_dst(idx), vreg_dw, _cmp_lt_os);
+ vbroadcastss(vreg_dw, ptr[reg_dw]);
+ vmulps(vreg_dst(idx) | kmask, vreg_dst(idx), vreg_dw);
+ break;
+ }
+ default: assert(!"unsupported depthwise algorithm");
+ }
+ }
+ }
if (dst_data_type == data_type::bf16) {
// TODO: implement store by zmm registers for bf16
if (do_bias_)
add(reg_bias, sizeof(acc_data_t));
+ add(reg_oc_offset, sizeof(acc_data_t));
+
dec(reg_oc_iter);
jnz(oc_loop, T_NEAR); // oc_loop end
postamble();
- if (do_eltwise_)
- eltwise_injector_->prepare_table();
+ for (auto& inj : jit_eltwise_injectors_)
+ inj->prepare_table();
ker_ = getCode<decltype(ker_)>();
}
template <data_type_t dst_data_type>
void gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::operator ()
- (dst_data_t *dst, const acc_data_t *acc, const acc_data_t *bias,
+ (dst_data_t *dst, const acc_data_t *acc, const acc_data_t *bias, size_t g_offset,
float sum_scale, size_t dst_stride_in_elements,
- size_t acc_stride_in_elements, size_t len, bool do_parallel)
+ size_t acc_stride_in_elements, size_t len, bool do_parallel, const post_ops_t& p)
{
assert(ker_);
if (len == 0)
ker_args args;
args.acc = acc + start_oc * acc_stride_in_elements;
args.dst = dst + start_oc * dst_stride_in_elements;
- args.bias = bias + start_oc;
+ args.bias = bias + start_oc + g_offset;
args.sum_scale = sum_scale;
args.dst_stride_in_bytes =
dst_stride_in_elements * sizeof(dst_data_t);
acc_stride_in_elements * sizeof(acc_data_t);
args.spatial_length = len;
args.oc_work = end_oc - start_oc;
+ args.oc_offset = (start_oc + g_offset) * sizeof(acc_data_t);
+
ker_(&args);
}
});
: nullptr;
const jit_gemm_conv_conf_t &jcp = this->pd()->jcp_;
+ auto src_offset = this->pd()->src_pd()->desc()->layout_desc.blocking.offset_padding;
+ auto dst_offset = this->pd()->dst_pd()->desc()->layout_desc.blocking.offset_padding;
float *bias = nullptr;
if (pd()->desc()->bias_desc.data_type == data_type::bf16) {
for (size_t iwork = start; iwork < end; ++iwork) {
int oh = ohb * jcp.oh_block;
int ow = owb * jcp.ow_block;
- const src_data_t *_src = src + (n * jcp.ngroups + g) * src_step;
+ const src_data_t *_src = src + (n * jcp.ngroups + g) * src_step + src_offset;
const wei_data_t *_weights = weights + g * weights_g_size;
- dst_data_t *_dst_im = dst + (n * jcp.ngroups + g) * dst_step;
+ dst_data_t *_dst_im = dst + (n * jcp.ngroups + g) * dst_step + dst_offset;
const int h_step = nstl::min(jcp.oh_block, jcp.oh - oh);
const int w_step = nstl::min(jcp.ow_block, jcp.ow - ow);
if (jcp.im2col_sz) {
if (this->pd()->is_postprocess_required()) {
size_t acc_str = LDC;
size_t dst_str = M;
- (*pp_ker_)(dst_local, _acc, bias + g * jcp.oc,
- sum_scale, dst_str, acc_str, m, jcp.nthr == 1);
+ (*pp_ker_)(dst_local, _acc, bias, g * jcp.oc,
+ sum_scale, dst_str, acc_str, m, jcp.nthr == 1, post_ops);
}
nd_iterator_step(g, jcp.ngroups, n, jcp.mb, od, jcp.od, ohb, nb_oh,
}
virtual bool is_gemm_conv_format() const {
- auto const &po = this->attr()->post_ops_;
- auto is_eltwise = [&](int idx)
- { return po.entry_[idx].is_eltwise(); };
- auto is_sum = [&](int idx) { return po.entry_[idx].is_sum(); };
-
- switch (po.len_) {
- case 0: return true; // no post_ops
- case 1: return is_eltwise(0) || is_sum(0); // sum OR eltwise
- case 2: return is_sum(0) && is_eltwise(1); // sum -> eltwise
- default: return false;
- }
+ auto const &p = this->attr()->post_ops_;
+ auto all_post_ops_supported = [&]() {
+ bool ok = true;
+
+ for (int i = 0; i < p.len_; i++) {
+ ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise);
+ }
+ return ok;
+ };
+
+ auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
+ auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
+ auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
+
+ return all_post_ops_supported() &&
+ count(primitive_kind::sum) <= 1 &&
+ IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
+
+ return false;
}
};
~pp_ker_t() {
delete bf16_emu_;
- delete eltwise_injector_;
+ for (auto inj : jit_eltwise_injectors_)
+ delete inj;
+ jit_eltwise_injectors_.clear();
}
void operator()(dst_data_t *dst, const acc_data_t *acc,
- const acc_data_t *bias, float sum_scale,
- size_t dst_str, size_t acc_str, size_t len, bool do_parallel);
+ const acc_data_t *bias, size_t g_offset, float sum_scale,
+ size_t dst_str, size_t acc_str, size_t len, bool do_parallel, const post_ops_t& p);
size_t dst_os_stride_;
size_t acc_stride_in_bytes;
size_t spatial_length;
size_t oc_work;
+ size_t oc_offset;
};
enum {
Xbyak::Reg64 reg_dst_str = r13;
Xbyak::Reg64 reg_acc_str = r14;
+ using Vmm = typename cpu_isa_traits<avx512_common>::Vmm;
+ Xbyak::Reg64 reg_oc_offset = r10;
+ Xbyak::Reg64 reg_dw = r9;
+ Xbyak::Opmask kmask = k7;
+ post_ops_t post_ops_;
+
Xbyak::Reg64 reserved_eltwise_gpr = r10;
Xbyak::Opmask reserved_eltwise_maskr = k2;
- Xbyak::Zmm vreg_sum_scale, vreg_bias;
+ Xbyak::Zmm vreg_sum_scale, vreg_bias, vreg_dw;
Xbyak::Zmm bf16_emu_reserv_1 = Xbyak::Zmm(27);
Xbyak::Zmm bf16_emu_reserv_2 = Xbyak::Zmm(28);
int data_reg_base_idx_;
size_t vlen_;
bf16_emulation_t *bf16_emu_;
- jit_uni_eltwise_injector_f32<avx512_common> *eltwise_injector_;
+ const primitive_attr_t* attr_;
+ nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> jit_eltwise_injectors_;
void generate();
int vreg_dst_idx(int iter) {
&& IMPLICATION(this->with_bias(), one_of(
desc()->bias_desc.data_type,
data_type::f32, data_type::bf16))
- && attr()->post_ops_.len_ <= 1
- && IMPLICATION(attr()->post_ops_.len_ == 1,
- attr()->post_ops_.entry_[0].is_eltwise())
+ && is_supported_post_ops()
&& dense_gemm_consitency_check(src_pd(), weights_pd(),
dst_pd());
if (!ok) return status::unimplemented;
return status::success;
}
+ virtual bool is_supported_post_ops() const {
+ const auto& p = this->attr()->post_ops_;
+
+ auto all_post_ops_supported = [&]() {
+ bool ok = true;
+
+ for (int i = 0; i < p.len_; i++) {
+ ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::eltwise, primitive_kind::depthwise);
+ }
+ return ok;
+ };
+
+ return all_post_ops_supported();
+ }
+
bool dst_is_acc_;
private:
, pp_kernel_(nullptr)
{
bool has_bias = pd()->with_bias(),
- has_eltwise = pd()->attr()->post_ops_.len_ == 1,
- has_scale = !pd()->attr()->output_scales_.has_default_values();
- postops_in_ip_ = false
- || !pd()->dst_is_acc_ || has_bias || has_eltwise || has_scale;
+ has_post_ops = pd()->attr()->post_ops_.len_ > 0,
+ has_scale = !pd()->attr()->output_scales_.has_default_values();
+ postops_in_ip_ = has_bias || has_post_ops || has_scale;
if (postops_in_ip_) {
if (mayiuse(avx512_core_bf16)) {
pp_kernel_ = new inner_product_utils::jit_pp_kernel_t<avx512_core_bf16, data_type::f32, dst_data_type>(apd);
if (post_op.is_eltwise()) {
if (post_op.eltwise.alg != alg_kind::eltwise_relu)
return status::unimplemented;
- } else if (post_op.is_depthwise()) {
+ } else if (post_op.is_depthwise() || post_op.is_quantization()) {
return status::unimplemented;
}
}
if (post_op.is_eltwise()) {
if (post_op.eltwise.alg != alg_kind::eltwise_relu)
return status::unimplemented;
- } else if (post_op.is_depthwise()) {
+ } else if (post_op.is_depthwise() || post_op.is_quantization()) {
return status::unimplemented;
}
}
if (!mayiuse(avx2)) {
for (int i = 0; i < p.len_; i++) {
auto &post_op = p.entry_[i];
- if (post_op.is_depthwise()) {
+ if (post_op.is_depthwise() || post_op.is_quantization()) {
return status::unimplemented;
}
}
};
#endif
auto vreg_accum = [=](int i_load, int i_ur) {
- int idx = i_ur * load_loop_blk + i_load;
+ int idx = i_ur + i_load * ur;
assert(idx < 31);
return Zmm(idx);
};
}
}
/* Eltwise post-op */
- if (jcp.with_eltwise)
- eltwise_injector_->compute_vector_range(0, ur * load_loop_blk);
+ int eltwise_inj_idx = 0;
+ int depthwise_inj_idx = 0;
+ const auto& p = attr_.post_ops_;
+
+ for (int i = 0; i < p.len_; i++) {
+ auto& post_op = p.entry_[i];
+ if (post_op.is_eltwise()) {
+ eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0, ur * load_loop_blk);
+ eltwise_inj_idx++;
+ } else if (post_op.is_depthwise()) {
+ mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+ mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+ add(reg_d_weights, reg_oc_off);
+ add(reg_d_bias, reg_oc_off);
+
+ for (int j = 0; j < load_loop_blk; ++j) {
+ int start_idx = vreg_accum(j, 0).getIdx();
+ int end_idx = start_idx + ur;
+
+ depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+ start_idx, end_idx, reg_d_weights, reg_d_bias);
+
+ add(reg_d_weights, jcp.oc_block * sizeof(float));
+ add(reg_d_bias, jcp.oc_block * sizeof(float));
+ }
+
+ depthwise_inj_idx++;
+ }
+ }
};
auto store_output = [=](bool output_is_aligned) {
void jit_avx512_core_bf16_1x1_conv_kernel::generate()
{
+ const auto& p = attr_.post_ops_;
+ for (int i = 0; i < p.len_; i++) {
+ auto& post_op = p.entry_[i];
+ if (post_op.is_eltwise()) {
+ eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
+ this,
+ post_op.eltwise.alg,
+ post_op.eltwise.alpha,
+ post_op.eltwise.beta
+ ));
+ } else if (post_op.is_depthwise()) {
+ depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<avx512_common>(
+ this,
+ post_op.depthwise.alg
+ ));
+ }
+ }
+
preamble();
mov(reg_bcast_data, ptr[param1 + GET_OFF(bcast_data)]);
mov(reg_output_stride, ptr[param1 + GET_OFF(output_stride)]);
}
+ mov(reg_oc_off, ptr[param1 + GET_OFF(oc_off)]);
auto load_loop_body = [=](int load_loop_blk) {
bcast_loop(load_loop_blk);
add(reg_load_data, load_loop_blk * jcp.load_loop_load_step);
assert(!"invalid prop_kind");
}
sub(reg_load_loop_work, load_loop_blk * jcp.load_loop_iter_step);
+ add(reg_oc_off, load_loop_blk * jcp.oc_block * jcp.typesize_out);
};
const int simd_w = 16;
postamble();
- if (jcp.with_eltwise)
- eltwise_injector_->prepare_table();
+ for (auto& inj : eltwise_injectors)
+ inj->prepare_table();
if (jcp.prop_kind == backward_weights) {
const uint16_t dst_prm_array[32] =
jit_1x1_conv_conf_t &jcp, const primitive_attr_t &attr) {
const auto &p = attr.post_ops_;
- auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
- auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
+ auto all_post_ops_supported = [&]() {
+ bool ok = true;
- switch (p.len_) {
- case 0: return true; // no post_ops
- case 1: return is_eltwise(0) || is_sum(0); // sum OR eltwise
- case 2: return is_sum(0) && is_eltwise(1); // sum -> eltwise
- default: return false;
- }
+ for (int i = 0; i < p.len_; i++) {
+ ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise);
+ }
+ return ok;
+ };
+ auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
+ auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
+ auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
+
+ return all_post_ops_supported() &&
+ count(primitive_kind::sum) <= 1 &&
+ IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
return false;
}
#include "jit_primitive_conf.hpp"
#include "jit_uni_eltwise.hpp"
#include "jit_avx512_core_bf16cvt.hpp"
+#include "jit_uni_depthwise.hpp"
+
//#define BF16_CONV_1x1_BWD_W_JIT_KER_USES_PERMW_TRANSPOSITION
const primitive_attr_t &attr) :
jit_generator(nullptr, ker_code_size),
jcp(ajcp), attr_(attr)
- , eltwise_injector_(nullptr)
, bf16_emu_(nullptr)
{
- if (jcp.with_eltwise)
- eltwise_injector_ = new jit_uni_eltwise_injector_f32<avx512_common>(
- this, jcp.eltwise);
-
if (!mayiuse(avx512_core_bf16))
bf16_emu_ = new bf16_emulation_t(this,
bf16_emu_reserv_1, bf16_emu_reserv_2,
}
~jit_avx512_core_bf16_1x1_conv_kernel() {
- delete eltwise_injector_;
+ for (auto inj : eltwise_injectors)
+ delete inj;
+ eltwise_injectors.clear();
+
+ for (auto inj : depthwise_injectors)
+ delete inj;
+ depthwise_injectors.clear();
delete bf16_emu_;
}
reg64_t reg_load_loop_work = rsi;
reg64_t reg_reduce_loop_work = r11;
reg64_t bcast_loop_iter = rdx;
- reg64_t reduce_loop_iter = abi_param1;
+ reg64_t reduce_loop_iter = r13;
reg64_t reg_reduce_pos_flag = rax;
reg64_t reg_output_stride = r13;
reg64_t reg_bias_data = r12;
Xbyak::Zmm zmm_bias = Xbyak::Zmm(31);
Xbyak::Label dst_prm_table;
+ reg64_t reg_oc_off = abi_param1;
+ reg64_t reg_d_weights = imm_addr64;
+ reg64_t reg_d_bias = r13;
- jit_uni_eltwise_injector_f32<avx512_common> *eltwise_injector_;
+ nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> eltwise_injectors;
+ nstl::vector<jit_uni_depthwise_injector_f32<avx512_common>*> depthwise_injectors;
int bcast_loop_work_offt = 0;
#ifdef BF16_CONV_1x1_BWD_W_JIT_KER_USES_PERMW_TRANSPOSITION
} else
p.bcast_data = src + data_blk_off(src_d, n, _icb, ih, iw);
+ p.oc_off = _ocb * jcp.oc_block * sizeof(dst_data_t);
kernel_->jit_ker(&p);
};
}
}
- if (jcp.with_eltwise) {
- if (ur_w == jcp.ur_w) {
- eltwise_injector_->compute_vector_range(0,
- jcp.nb_oc_blocking * jcp.ur_w);
- } else {
- for (int k = 0; k < jcp.nb_oc_blocking; k++)
- eltwise_injector_->compute_vector_range(k * jcp.ur_w,
- k * jcp.ur_w + ur_w);
+ int eltwise_inj_idx = 0;
+ int depthwise_inj_idx = 0;
+ const auto &p = attr_.post_ops_;
+
+ for (int i = 0; i < p.len_; i++) {
+ auto& post_op = p.entry_[i];
+ if (post_op.is_eltwise()) {
+ if (ur_w == jcp.ur_w) {
+ eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0,
+ jcp.nb_oc_blocking * jcp.ur_w);
+ } else {
+ for (int k = 0; k < jcp.nb_oc_blocking; k++)
+ eltwise_injectors[eltwise_inj_idx]->compute_vector_range(k * jcp.ur_w,
+ k * jcp.ur_w + ur_w);
+ }
+
+ eltwise_inj_idx++;
+ } else if (post_op.is_depthwise()) {
+ mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+ mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+ add(reg_d_weights, ptr[this->param1 + GET_OFF(oc_off)]);
+ add(reg_d_bias, ptr[this->param1 + GET_OFF(oc_off)]);
+
+ for (int k = 0; k < jcp.nb_oc_blocking; k++) {
+ depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+ k*jcp.ur_w, k*jcp.ur_w + ur_w, reg_d_weights, reg_d_bias);
+
+ add(reg_d_weights, jcp.oc_block * sizeof(float));
+ add(reg_d_bias, jcp.oc_block * sizeof(float));
+ }
+
+ depthwise_inj_idx++;
}
}
void jit_avx512_core_bf16_fwd_kernel::generate()
{
+ const auto &p = attr_.post_ops_;
+ for (int i = 0; i < p.len_; i++) {
+ auto &post_op = p.entry_[i];
+ if (post_op.is_eltwise()) {
+ eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
+ this,
+ post_op.eltwise.alg,
+ post_op.eltwise.alpha,
+ post_op.eltwise.beta
+ ));
+ } else if (post_op.is_depthwise()) {
+ depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<avx512_common>(
+ this,
+ post_op.depthwise.alg
+ ));
+ }
+ }
+
int iw = jcp.iw;
int ow = jcp.ow;
int ow_block = jcp.ow_block;
}
postamble();
- if (jcp.with_eltwise)
- eltwise_injector_->prepare_table();
+ for (auto& inj : eltwise_injectors)
+ inj->prepare_table();
}
bool jit_avx512_core_bf16_fwd_kernel::post_ops_ok(
jit_conv_conf_t &jcp, const primitive_attr_t &attr) {
const auto &p = attr.post_ops_;
- auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
- auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
+ auto all_post_ops_supported = [&]() {
+ bool ok = true;
- switch (p.len_) {
- case 0: return true; // no post_ops
- case 1: return is_eltwise(0) || is_sum(0); // sum OR eltwise
- case 2: return is_sum(0) && is_eltwise(1); // sum -> eltwise
- default: return false;
- }
+ for (int i = 0; i < p.len_; i++) {
+ ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise);
+ }
+ return ok;
+ };
+ auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
+ auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
+ auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
+
+ return all_post_ops_supported() &&
+ count(primitive_kind::sum) <= 1 &&
+ IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
return false;
}
#include "jit_primitive_conf.hpp"
#include "jit_uni_eltwise.hpp"
#include "jit_avx512_core_bf16cvt.hpp"
+#include "jit_uni_depthwise.hpp"
//#define BF16_CONV_BWD_W_JIT_KER_USES_PERMW_TRANSPOSITION
//#define BF16_CONV_BWD_W_DOES_NOT_USE_BARRIERS
jit_generator(nullptr, ker_code_size),
jcp(ajcp),
attr_(attr),
- eltwise_injector_(nullptr),
bf16_emu_(nullptr)
{
- if (jcp.with_eltwise)
- eltwise_injector_ = new jit_uni_eltwise_injector_f32<avx512_common>(
- this, jcp.eltwise);
if (!mayiuse(avx512_core_bf16))
bf16_emu_ = new bf16_emulation_t(this,
bf16_emu_reserv_1, bf16_emu_reserv_2,
~jit_avx512_core_bf16_fwd_kernel() {
delete bf16_emu_;
- delete eltwise_injector_;
+ for (auto inj : eltwise_injectors)
+ delete inj;
+ eltwise_injectors.clear();
+
+ for (auto inj : depthwise_injectors)
+ delete inj;
+ depthwise_injectors.clear();
}
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_bf16_fwd_kernel)
Xbyak::Zmm bf16_emu_reserv_5 = Xbyak::Zmm(29);
Xbyak::Zmm bf16_emu_reserv_6 = Xbyak::Zmm(30);
- jit_uni_eltwise_injector_f32<avx512_common> *eltwise_injector_;
+ reg64_t reg_d_weights = imm_addr64;
+ reg64_t reg_d_bias = reg_kj;
+
+ nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> eltwise_injectors;
+ nstl::vector<jit_uni_depthwise_injector_f32<avx512_common>*> depthwise_injectors;
+
bf16_emulation_t *bf16_emu_;
inline void prepare_output(int ur_w);
auto src_w = src + src_d.blk_off(n, g_icb, iw_s);
auto wht_w = weights + wht_blk_off(weights_d, g, ocb);
+ par_conv.oc_off = g_oc * sizeof(dst_data_t);
par_conv.src = src_w;
par_conv.dst = dst_w;
par_conv.filt = wht_w;
auto aux_src = src_w + i_t_overflow * dilate_h * src_h_stride;
auto aux_wht = wht_w + i_t_overflow * wht_h_stride;
+ par_conv.oc_off = g_oc * sizeof(dst_data_t);
par_conv.src = aux_src;
par_conv.dst = dst_w;
par_conv.filt = aux_wht;
auto aux_src = src_w + i_t_overflow * dilate_h * src_h_stride;
auto aux_wht = wht_w + i_t_overflow * wht_h_stride;
+ par_conv.oc_off = g_oc * sizeof(dst_data_t);
par_conv.src = aux_src;
par_conv.dst = dst_w;
par_conv.filt = aux_wht;
L(iter_exit_label);
}
-void jit_avx512_dw_conv_fwd_kernel_bf16::apply_activation(
+void jit_avx512_dw_conv_fwd_kernel_bf16::apply_postprocess(
int ur_ch_blocks, int ur_w) {
- if (this->jcp.with_eltwise) {
- eltwise_injector_->compute_vector_range(
- acc_idx_start, ur_w * ur_ch_blocks + acc_idx_start);
+ int eltwise_inj_idx = 0;
+ int depthwise_inj_idx = 0;
+ const auto& p = attr_.post_ops_;
+
+ for (int i = 0; i < p.len_; i++) {
+ auto& post_op = p.entry_[i];
+ if (post_op.is_eltwise()) {
+ int start_idx = get_acc_reg(0).getIdx();
+ int end_idx = get_acc_reg(ur_w * ur_ch_blocks).getIdx();
+
+ eltwise_injectors[eltwise_inj_idx]->compute_vector_range(start_idx, end_idx);
+ eltwise_inj_idx++;
+ } else if (post_op.is_depthwise()) {
+ mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+ mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+ add(reg_d_weights, ptr[this->param1 + GET_OFF(oc_off)]);
+ add(reg_d_bias, ptr[this->param1 + GET_OFF(oc_off)]);
+
+ for (int ch = 0; ch < ur_ch_blocks; ch++) {
+ int start_idx = get_acc_reg(ur_w * ch).getIdx();
+ int end_idx = get_acc_reg(ur_w * ch + ur_w).getIdx();
+
+ depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+ start_idx, end_idx, reg_d_weights, reg_d_bias);
+
+ add(reg_d_weights, jcp.ch_block * sizeof(float));
+ add(reg_d_bias, jcp.ch_block * sizeof(float));
+ }
+
+ depthwise_inj_idx++;
+ }
}
}
load_src(ur_ch_blocks, ur_w);
apply_filter_unrolled(ur_ch_blocks, ur_w);
- apply_activation(ur_ch_blocks, ur_w);
+ apply_postprocess(ur_ch_blocks, ur_w);
store_dst(ur_ch_blocks, ur_w);
add(reg_input, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
load_src(ur_ch_blocks, ur_w);
apply_filter(ur_ch_blocks, ur_w);
- apply_activation(ur_ch_blocks, ur_w);
+ apply_postprocess(ur_ch_blocks, ur_w);
store_dst(ur_ch_blocks, ur_w);
add(reg_input, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
}
void jit_avx512_dw_conv_fwd_kernel_bf16::generate() {
+ const auto& p = attr_.post_ops_;
+ for (int i = 0; i < p.len_; i++) {
+ auto& post_op = p.entry_[i];
+ if (post_op.is_eltwise()) {
+ eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
+ this,
+ post_op.eltwise.alg,
+ post_op.eltwise.alpha,
+ post_op.eltwise.beta
+ ));
+ } else if (post_op.is_depthwise()) {
+ depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<avx512_common>(
+ this,
+ post_op.depthwise.alg
+ ));
+ }
+ }
+
this->preamble();
mov(reg_input, ptr[this->param1 + GET_OFF(src)]);
this->postamble();
- if (jcp.with_eltwise)
- eltwise_injector_->prepare_table();
+ for (auto& inj : eltwise_injectors)
+ inj->prepare_table();
}
inline void jit_avx512_dw_conv_bwd_data_kernel_bf16::load_ddst(
#include "jit_generator.hpp"
#include "jit_primitive_conf.hpp"
#include "jit_uni_eltwise.hpp"
-
+#include "jit_uni_depthwise.hpp"
#include "jit_avx512_core_bf16cvt.hpp"
namespace mkldnn {
struct jit_avx512_dw_conv_fwd_kernel_bf16 : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_dw_conv_fwd_kernel_bf16)
- jit_avx512_dw_conv_fwd_kernel_bf16(jit_conv_conf_t ajcp, const primitive_attr_t&)
- : jcp(ajcp), eltwise_injector_(nullptr), bf16_emu_(nullptr) {
- if (jcp.with_eltwise)
- eltwise_injector_ = new jit_uni_eltwise_injector_f32<avx512_common>(
- this, jcp.eltwise);
+ jit_avx512_dw_conv_fwd_kernel_bf16(jit_conv_conf_t ajcp, const primitive_attr_t& attr)
+ : jcp(ajcp), attr_(attr), bf16_emu_(nullptr) {
if (!mayiuse(avx512_core_bf16))
bf16_emu_ = new bf16_emulation_t(this, bf16_emu_reserv_1,
bf16_emu_reserv_2, bf16_emu_reserv_3, bf16_emu_reserv_4,
}
~jit_avx512_dw_conv_fwd_kernel_bf16() {
- delete eltwise_injector_;
+ for (auto inj : eltwise_injectors)
+ delete inj;
+ eltwise_injectors.clear();
+
+ for (auto inj : depthwise_injectors)
+ delete inj;
+ depthwise_injectors.clear();
+
delete bf16_emu_;
}
jit_conv_conf_t jcp;
+ const primitive_attr_t& attr_;
void (*jit_ker)(jit_conv_call_s *);
private:
reg64_t reg_ur_w = rbp;
reg64_t reg_ch_blocks = aux1_reg_input;
reg64_t imm_addr64 = aux1_reg_input;
+ reg64_t reg_d_weights = imm_addr64;
+ reg64_t reg_d_bias = iter_kh;
Xbyak::Zmm zmm_ker_reg = Xbyak::Zmm(0);
Xbyak::Zmm zmm_src_reg = Xbyak::Zmm(1);
inline void load_src(int ur_ch_blocks, int ur_w);
inline void apply_filter(int ur_ch_blocks, int ur_w);
inline void apply_filter_unrolled(int ur_ch_blocks, int ur_w);
- inline void apply_activation(int ur_ch_blocks, int ur_w);
+ inline void apply_postprocess(int ur_ch_blocks, int ur_w);
inline void store_dst(int ur_ch_blocks, int ur_w);
inline void loop_ow(int ur_ch_blocks);
- jit_uni_eltwise_injector_f32<avx512_common> *eltwise_injector_;
+ nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> eltwise_injectors;
+ nstl::vector<jit_uni_depthwise_injector_f32<avx512_common>*> depthwise_injectors;
bf16_emulation_t *bf16_emu_;
bool jit_uni_dw_conv_fwd_kernel<isa, kernel_dt>::post_ops_ok(
jit_conv_conf_t &jcp, const primitive_attr_t &attr, bool is_bf16) {
const auto &p = attr.post_ops_;
+ auto all_post_ops_supported = [&]() {
+ bool ok = true;
- if (is_bf16) {
- auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
- auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
-
- switch (p.len_) {
- case 0: return true; // no post_ops
- case 1: return is_eltwise(0) || is_sum(0); // sum OR eltwise
- case 2: return is_sum(0) && is_eltwise(1); // sum -> eltwise
- default: return false;
+ for (int i = 0; i < p.len_; i++) {
+ ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise);
}
- } else {
- auto all_post_ops_supported = [&]() {
- bool ok = true;
-
- for (int i = 0; i < p.len_; i++) {
- ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise, primitive_kind::quantization);
- }
- return ok;
- };
- auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
- auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
- auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
-
- return all_post_ops_supported() &&
- count(primitive_kind::sum) <= 1 &&
- IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
- }
+ return ok;
+ };
+ auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
+ auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
+ auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
+
+ return all_post_ops_supported() &&
+ count(primitive_kind::sum) <= 1 &&
+ IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
return false;
}
return s_val >= 0 ? s_val : s_val*w_val;
}
+union float_raw {
+ float f;
+ unsigned short i[2];
+};
+
+static float bf16tof32(mkldnn_bfloat16_t bf16) {
+ union float_raw t = { 0 };
+ t.i[1] = bf16;
+ t.i[0] = 0;
+ return t.f;
+}
+
+static mkldnn_bfloat16_t f32tobf16(float f32) {
+ union float_raw t = { 0 };
+ t.f = f32;
+ return t.i[1];
+}
+
+inline mkldnn_bfloat16_t bf16_scale_shift_fwd(mkldnn_bfloat16_t s_val, mkldnn_bfloat16_t w_val, mkldnn_bfloat16_t b_val) {
+ return f32tobf16(bf16tof32(s_val) * bf16tof32(w_val) + bf16tof32(b_val));
+}
+
+inline mkldnn_bfloat16_t bf16_prelu_fwd(mkldnn_bfloat16_t s_val, mkldnn_bfloat16_t w_val) {
+ return s_val >= 0 ? s_val : f32tobf16(bf16tof32(s_val) * bf16tof32(w_val));
+}
+
ref_depthwise_scalar_fwd_t::ref_depthwise_scalar_fwd_t(const alg_kind_t alg_)
: alg(alg_) {
using namespace alg_kind;
data_t b_val = bias ? bias[bias_d.off(wei_idx)] : (data_t)0;
data_t &d_val = dst[data_off];
- switch (alg_kind) {
- case depthwise_scale_shift: d_val = scale_shift_fwd(s_val, w_val, b_val); break;
- case depthwise_prelu: d_val = prelu_fwd(s_val, w_val); break;
- default: assert(!"unknown depthwise alg_kind");
+ if (data_type == mkldnn_bf16) {
+ switch (alg_kind) {
+ case depthwise_scale_shift: d_val = bf16_scale_shift_fwd(s_val, w_val, b_val); break;
+ case depthwise_prelu: d_val = bf16_prelu_fwd(s_val, w_val); break;
+ default: assert(!"unknown depthwise alg_kind");
+ }
+ } else {
+ switch (alg_kind) {
+ case depthwise_scale_shift: d_val = scale_shift_fwd(s_val, w_val, b_val); break;
+ case depthwise_prelu: d_val = prelu_fwd(s_val, w_val); break;
+ default: assert(!"unknown depthwise alg_kind");
+ }
}
});
}
template struct ref_depthwise_fwd_t<data_type::f32>;
+template struct ref_depthwise_fwd_t<data_type::bf16>;
}
}
test_convolution_forward_x8s8fp.cpp
test_convolution_forward_x8s8fp_3d.cpp
test_convolution_eltwise_forward_f32.cpp
+ test_convolution_eltwise_forward_bf16.cpp
test_convolution_eltwise_forward_x8s8f32s32.cpp
test_convolution_backward_data_f32.cpp
test_convolution_backward_data_s16s16s32.cpp
test_rnn_forward.cpp
test_roi_pooling_forward.cpp
test_convolution_depthwise_forward_f32.cpp
+ test_convolution_depthwise_forward_bf16f32.cpp
test_convolution_depthwise_forward_x8s8f32s32.cpp
test_convolution_dw_conv_f32.cpp
test_convolution_dw_conv_u8s8s32.cpp
--- /dev/null
+/*******************************************************************************
+* Copyright 2020 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+#include "mkldnn.hpp"
+#include "test_convolution_depthwise_forward_common.hpp"
+
+namespace mkldnn {
+
+using convolution_test = convolution_depthwise_test<mkldnn_bfloat16_t, mkldnn_bfloat16_t, float, float>;
+
+TEST_P(convolution_test, TestConvolution)
+{
+}
+
+#define EXPAND_FORMATS(src, weights, bias, dst) \
+ { mkldnn::memory::format::src, mkldnn::memory::format::weights, \
+ mkldnn::memory::format::bias, mkldnn::memory::format::dst }
+
+#define FMT_WEIGHTS_BLOCKED16 OIhw8i16o2i
+#define FMT_WEIGHTS_BLOCKED16_DW Goihw16g
+
+#define ENGINE mkldnn::engine::kind::cpu
+#define ALGORITHM mkldnn::convolution_direct
+
+#define CONCAT_WITH_UNDERSCORE_(a,b) a ## _ ## b
+#define CONCAT_WITH_UNDERSCORE(a,b) CONCAT_WITH_UNDERSCORE_(a,b)
+
+#define INST_TEST_CASE_(str, ...) INSTANTIATE_TEST_CASE_P( \
+ str, convolution_test, ::testing::Values(__VA_ARGS__))
+
+#define INST_TEST_CASE(str, ...) INST_TEST_CASE_( \
+ CONCAT_WITH_UNDERSCORE(CONCAT_WITH_UNDERSCORE(Convolution, \
+ str), depthwise), __VA_ARGS__)
+
+#define EXPAND_ARGS(args) args
+
+#define PARAMS(...) \
+ EXPAND_ARGS(PARAMS_CONV(depthwise_scale_shift, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(depthwise_prelu, __VA_ARGS__))
+
+#define PARAMS_CONV(alg, src, weights, bias, dst, ...) \
+ test_convolution_depthwise_params_t {alg, ENGINE, ALGORITHM, \
+ EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
+ {__VA_ARGS__} }
+
+ INST_TEST_CASE(SimpleSmall,
+ PARAMS(nchw, oihw, x, nchw, 2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+ PARAMS(nchw, oihw, x, nchw, 2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+ PARAMS(nchw, goihw, x, nchw, 2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+ PARAMS(nchw, goihw, x, nchw, 2, 2, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
+ );
+
+ INST_TEST_CASE(SimpleSmall_Blocked16,
+ PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+ 2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+ PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+ 2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+ PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16_DW, x, nChw16c,
+ 2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+ PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16_DW, x, nChw16c,
+ 2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1),
+
+ PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+ 2, 1, 8, 13, 13, 16, 13, 13, 1, 1, 0, 0, 1, 1),
+ PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+ 2, 1, 8, 3, 3, 16, 3, 3, 1, 1, 0, 0, 1, 1),
+ PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+ 2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+ PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+ 2, 1, 16, 13, 13, 48, 13, 13, 3, 3, 0, 0, 1, 1)
+ );
+}
return div_up(a, b) * b;
}
+static float bf16tof32(mkldnn_bfloat16_t bf16) {
+ union float_raw t = { 0 };
+ t.i[1] = bf16;
+ t.i[0] = 0;
+ return t.f;
+}
+
template <typename data_t_src, typename data_t_wei,
typename data_t_acc, typename data_t_dst>
void compute_ref_conv_depthwise_fwd(const test_convolution_sizes_t &c,
const memory &depthwise_weights, const memory &depthwise_bias)
{
data_t_src *src_data = (data_t_src *)src.get_data_handle();
+ memory::data_type data_type_src = data_traits<data_t_src>::data_type;
data_t_wei *weights_data = (data_t_wei *)weights.get_data_handle();
data_t_dst *bias_data
= (data_t_dst *)(w_bias ? bias.get_data_handle() : nullptr);
+ oc * padded_ic_w / c.ng * c.kh * c.kw
+ ic * c.kh * c.kw + kh * c.kw + kw;
- dst_data[didx] += src_data[map_index(src_d, iidx)]
+ if (data_type_src == mkldnn_bf16) {
+ dst_data[didx] += bf16tof32(src_data[map_index(src_d, iidx)])
+ * bf16tof32(weights_data[map_index(weights_d, widx)]);
+ } else {
+ dst_data[didx] += src_data[map_index(src_d, iidx)]
* weights_data[map_index(weights_d, widx)];
+ }
}
switch (depthwise_alg) {
--- /dev/null
+/*******************************************************************************
+* Copyright 2020 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+#include "math_utils.hpp"
+#include "mkldnn.hpp"
+#include "test_convolution_eltwise_forward_common.hpp"
+
+namespace mkldnn {
+
+using convolution_test = convolution_eltwise_test<mkldnn_bfloat16_t, mkldnn_bfloat16_t, float, float>;
+
+TEST_P(convolution_test, TestConvolutionEltwise)
+{
+}
+
+#define EXPAND_FORMATS(src, weights, bias, dst) \
+ { mkldnn::memory::format::src, mkldnn::memory::format::weights, \
+ mkldnn::memory::format::bias, mkldnn::memory::format::dst }
+
+#define CONCAT_WITH_UNDERSCORE_(a,b) a ## _ ## b
+#define CONCAT_WITH_UNDERSCORE(a,b) CONCAT_WITH_UNDERSCORE_(a,b)
+
+#define INST_TEST_CASE_(str, ...) INSTANTIATE_TEST_SUITE_P( \
+ str, convolution_test, ::testing::Values(__VA_ARGS__))
+
+#define INST_TEST_CASE(str, ...) INST_TEST_CASE_( \
+ CONCAT_WITH_UNDERSCORE(CONCAT_WITH_UNDERSCORE(Convolution, \
+ str), eltwise), __VA_ARGS__)
+
+#define EXPAND_ARGS(args) args
+
+#define PARAMS(...) \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_relu, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_tanh, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_elu, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_square, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_abs, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_sqrt, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_linear, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_bounded_relu, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_soft_relu, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_logistic, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_exp, __VA_ARGS__)), \
+ EXPAND_ARGS(PARAMS_CONV(eltwise_swish, __VA_ARGS__))
+
+#define ELTWISE_ALPHA 0.5f
+#define ELTWISE_BETA 1.5f
+
+#define PARAMS_CONV(alg, src, weights, bias, dst, ...) \
+ test_convolution_eltwise_params_t {alg, mkldnn::engine::kind::cpu, \
+ mkldnn::convolution_direct, ELTWISE_ALPHA, ELTWISE_BETA, \
+ EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
+ {__VA_ARGS__} }
+
+ INST_TEST_CASE(SimpleSmall,
+ PARAMS(nchw, oihw, x, nchw, 2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+ PARAMS(nchw, oihw, x, nchw, 2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+ PARAMS(nchw, goihw, x, nchw, 2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+ PARAMS(nchw, goihw, x, nchw, 2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
+ );
+
+ INST_TEST_CASE(SimpleSmall_Blocked16,
+ PARAMS(nChw16c, Goihw16g, x, nChw16c, 1, 48, 48, 20, 20, 48, 20, 20, 3, 3, 1, 1, 1, 1),
+ PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 1, 1, 48, 20, 20, 48, 20, 20, 1, 1, 0, 0, 1, 1),
+ PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 1, 1, 48, 20, 20, 48, 20, 20, 3, 3, 0, 0, 1, 1),
+ PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 2, 1, 32, 32, 32, 32, 32, 32, 3, 3, 0, 0, 1, 1)
+ );
+
+ INST_TEST_CASE(SimpleSmall_Blocked16_Tail,
+ PARAMS(nChw16c, Goihw16g, x, nChw16c, 1, 47, 47, 20, 20, 47, 20, 20, 3, 3, 1, 1, 1, 1),
+ PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 1, 1, 47, 20, 20, 47, 20, 20, 1, 1, 0, 0, 1, 1),
+ PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 2, 1, 32, 32, 32, 32, 32, 32, 3, 3, 0, 0, 1, 1)
+ );
+}
namespace mkldnn {
+static float bf16tof32(mkldnn_bfloat16_t bf16) {
+ union float_raw t = { 0 };
+ t.i[1] = bf16;
+ t.i[0] = 0;
+ return t.f;
+}
+
template <typename data_t_src, typename data_t_wei,
typename data_t_acc, typename data_t_dst>
void compute_ref_conv_eltwise_fwd(const test_convolution_sizes_t &c,
float elt_alpha, float elt_beta)
{
data_t_src *src_data = (data_t_src *)src.get_data_handle();
+ memory::data_type data_type_src = data_traits<data_t_src>::data_type;
data_t_wei *weights_data = (data_t_wei *)weights.get_data_handle();
data_t_dst *bias_data
= (data_t_dst *)(w_bias ? bias.get_data_handle() : nullptr);
+ oc * padded_ic_w / c.ng * c.kh * c.kw
+ ic * c.kh * c.kw + kh * c.kw + kw;
- dst_data[didx] += src_data[map_index(src_d, iidx)]
+ if (data_type_src == mkldnn_bf16) {
+ dst_data[didx] += bf16tof32(src_data[map_index(src_d, iidx)])
+ * bf16tof32(weights_data[map_index(weights_d, widx)]);
+ } else {
+ dst_data[didx] += src_data[map_index(src_d, iidx)]
* weights_data[map_index(weights_d, widx)];
+ }
}
auto &d = dst_data[didx];
case eltwise_soft_relu: d = soft_relu_fwd(d); break;
case eltwise_logistic: d = logistic_fwd(d); break;
case eltwise_exp: d = exp_fwd(d); break;
+ case eltwise_gelu: d = gelu_fwd(d); break;
case eltwise_clamp: d = clamp_fwd(d, elt_alpha, elt_beta); break;
case eltwise_swish: d = swish_fwd(d, elt_alpha); break;
default: assert(!"unknown alg_kind");
list(APPEND ${TARGET}_LINKER_LIBS ${BLAS_LIBRARIES})
elseif (GEMM STREQUAL "MKL")
## enable cblas_gemm from mlkml package
-if(WIN32 OR APPLE)
- detect_mkl("mklml")
-else()
- if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
- detect_mkl("mklml_intel")
+ if(WIN32 OR APPLE)
+ detect_mkl("mklml")
else()
- detect_mkl("mklml_gnu")
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+ detect_mkl("mklml_intel")
+ else()
+ detect_mkl("mklml_gnu")
+ endif()
endif()
-endif()
add_definitions(-DUSE_MKL -DUSE_CBLAS)
include_directories(AFTER ${MKLINC})
list(APPEND ${TARGET}_LINKER_LIBS ${MKLLIB})
"${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/mvnc"
"${CMAKE_BINARY_DIR}/thirdparty/movidius/mvnc")
+ if(ENABLE_TESTS)
+ add_subdirectory(tests)
+ endif()
endif()
msec = __abstime->tv_sec * 1000 + __abstime->tv_nsec / 1000000;
}
- return SleepConditionVariableCS(&__cond->_cv, __mutex, (DWORD)msec);
+ // SleepConditionVariableCS returns bool=true on success.
+ if (SleepConditionVariableCS(&__cond->_cv, __mutex, (DWORD)msec))
+ return 0;
+
+ const int rc = (int)GetLastError();
+ return rc == ERROR_TIMEOUT ? ETIMEDOUT : rc;
}
int pthread_cond_broadcast(pthread_cond_t *__cond)
${LIBUSB_LIBRARY})
endif()
+if(ENABLE_TESTS AND ENABLE_MYRIAD_MVNC_TESTS)
+ add_subdirectory(tests)
+endif()
+
using Devices = std::list<wd_context_as_tuple>;
Devices watchedDevices;
- std::mutex devicesListAcc;
- std::atomic<int> generation = {0};
- std::atomic_bool threadRunning;
+ std::atomic_bool threadRunning {false};
pthread_mutex_t routineLock;
pthread_cond_t wakeUpPingThread;
WatchdogImpl& operator = (const WatchdogImpl&) = delete;
WatchdogImpl& operator = (WatchdogImpl&&) = delete;
+ class AutoScope {
+ public:
+ explicit AutoScope(const std::function<void()>& func) : _func(func) {}
+ ~AutoScope() { _func(); }
+
+ AutoScope(const AutoScope&) = delete;
+ AutoScope& operator=(const AutoScope&) = delete;
+ private:
+ std::function<void()> _func;
+ };
+
private:
WatchdogImpl() {
if (rc != 0) {
throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc));
}
+ AutoScope attrDestroy([&attr]{
+ if (pthread_condattr_destroy(&attr) != 0)
+ mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute.");
+ });
rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
if (rc != 0) {
}
private:
+ /// @note: We are using here pthread_cond_timedwait as a replacement for condition_variable::wait_for,
+ /// as libstdc++ has bug not using monotonic clock. When GCC 10.x became minimum supported version,
+ /// that code could be removed.
+ void wait_for(const milliseconds sleepInterval) {
+ struct timespec timeToWait = {0, 0};
+
+ const auto sec = std::chrono::duration_cast<std::chrono::seconds>(sleepInterval);
+#if (defined(__APPLE__) || defined(_WIN32))
+ timeToWait.tv_sec = sec.count();
+ timeToWait.tv_nsec =
+ std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+ std::chrono::nanoseconds(sec).count();
+#else
+ clock_gettime(CLOCK_MONOTONIC, &timeToWait);
+ const auto secondInNanoSeconds = 1000000000L;
+ const auto nsecSum = std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+ std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec;
+ timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds;
+ timeToWait.tv_nsec = nsecSum % secondInNanoSeconds;
+#endif // (defined(__APPLE__) || defined(_WIN32))
+
+#if defined(__APPLE__)
+ const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait);
+#else
+ const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait);
+#endif // defined(__APPLE__)
+ if (rc != 0 && rc != ETIMEDOUT) {
+ throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc));
+ }
+ }
void watchdog_routine() noexcept {
try {
mvLog(MVLOG_INFO, "thread started\n");
milliseconds sleepInterval;
- struct timespec timeToWait = {0, 0};
+
CustomUniqueLock lock {&routineLock};
do {
}
// TODO: no timer coalescing feature, to minimized thread wakes
sleepInterval = std::get<0>(*minInterval)->dueIn(currentTime);
- mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count());
-
- auto sec = std::chrono::duration_cast<std::chrono::seconds>(sleepInterval);
-
-#if (defined(__APPLE__) || defined(_WIN32))
- timeToWait.tv_sec = sec.count();
- timeToWait.tv_nsec =
- std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
- std::chrono::nanoseconds(sec).count();
-#else
- clock_gettime(CLOCK_MONOTONIC, &timeToWait);
- timeToWait.tv_sec += sec.count();
- timeToWait.tv_nsec +=
- std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
- std::chrono::nanoseconds(sec).count();
-#endif // (defined(__APPLE__) || defined(_WIN32))
-
-#if defined(__APPLE__)
- pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait);
-#else
- pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait);
-#endif // defined(__APPLE__)
+ if (sleepInterval.count() <= 0)
+ continue;
+ mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count());
+ wait_for(sleepInterval);
mvLog(MVLOG_DEBUG, "waiting completed in %ld ms\n",
duration_cast<std::chrono::milliseconds>(steady_clock::now() - currentTime).count());
--- /dev/null
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(CMAKE_CXX_STANDARD 11)
+
+include(${XLINK_DIR}/XLink.cmake)
+
+set(MVNC_TESTS_COMMON_INCLUDE
+ "cases"
+ "helpers"
+ )
+
+file(GLOB MVNC_TESTS_COMMON_SOURCES
+ "helpers/mvnc_test_helper.cpp"
+ "cases/mvnc_common_test_cases.cpp")
+
+set(PRIVATE_INCLUDE
+ ${IE_MAIN_SOURCE_DIR}/tests_new/gtest/googletest/include
+ ${IE_MAIN_SOURCE_DIR}/tests_new/gtest/googletest/
+ ${XLINK_INCLUDE}
+ ${XLINK_PLATFORM_INCLUDE}
+ ${WATCHDOG_INCLUDE})
+
+function(add_mvnc_test_target TARGET_NAME TESTS_SOURCES)
+ add_executable(${TARGET_NAME} ${TESTS_SOURCES} ${MVNC_TESTS_COMMON_SOURCES})
+
+ target_include_directories(${TARGET_NAME}
+ PUBLIC
+ "../include"
+ PRIVATE
+ ${MVNC_TESTS_COMMON_INCLUDE}
+ ${PRIVATE_INCLUDE})
+
+ if(ENABLE_MYRIAD_NO_BOOT)
+ target_compile_definitions(${TARGET_NAME} PRIVATE NO_BOOT)
+ endif()
+
+ target_compile_definitions(${TARGET_NAME}
+ PRIVATE
+ __PC__)
+
+ target_link_libraries(${TARGET_NAME} gtest gtest_main mvnc)
+ add_dependencies(${TARGET_NAME} vpu_copy_firmware)
+endfunction()
+
+################# MvncTests ###################
+
+if(ENABLE_MYRIAD_NO_BOOT)
+ set(MVNC_TESTS_SOURCES
+ mvnc_no_boot_tests.cpp
+ cases/mvnc_no_boot_test_cases.cpp
+ )
+else()
+ set(MVNC_TESTS_SOURCES
+ mvnc_tests_common.cpp
+ mvnc_tests_usb.cpp
+ cases/mvnc_usb_test_cases.cpp)
+endif()
+
+add_mvnc_test_target("MvncTests" "${MVNC_TESTS_SOURCES}")
+
+################# MvncStressTests ###################
+
+set(MVNC_STRESS_TESTS_SOURCES
+ mvnc_stress_tests.cpp
+ cases/mvnc_stress_test_cases.cpp
+ )
+add_mvnc_test_target("MvncStressTests" "${MVNC_STRESS_TESTS_SOURCES}")
+
+################# MvncUtilsTests ###################
+
+set(MVNC_UTILS_TESTS_SOURCES mvnc_utils_tests.cpp)
+add_mvnc_test_target("MvncUtilsTests" "${MVNC_UTILS_TESTS_SOURCES}")
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+// Implementation of class MvncTestsCommon
+//------------------------------------------------------------------------------
+MvncTestsCommon::MvncTestsCommon() {
+#if !(defined(_WIN32) || defined(_WIN64))
+ // On linux we should use custom path to firmware due to another searching mechanism for library
+ strcpy(firmwarePath, "./lib");
+#else
+ firmwarePath[0] = 0;
+#endif
+}
+
+void MvncTestsCommon::SetUp() {
+ initialize_usb_boot();
+ ASSERT_NO_ERROR(setLogLevel(ncLogLevel));
+ availableDevices_ = getAmountOfDevices();
+}
+
+void MvncTestsCommon::TearDown() {
+ ncDeviceResetAll();
+}
+
+int MvncTestsCommon::setLogLevel(const mvLog_t logLevel) {
+ ncStatus_t status = ncGlobalSetOption(NC_RW_LOG_LEVEL, &logLevel,
+ sizeof(logLevel));
+ if (status != NC_OK) {
+ fprintf(stderr,
+ "WARNING: failed to set log level: %d with error: %d\n",
+ ncLogLevel, status);
+ return -1;
+ }
+ ncLogLevel = logLevel;
+ return 0;
+}
+
+void MvncTestsCommon::openDevices(const int devicesToBoot, ncDeviceHandle_t **deviceHandlers,
+ int &amountOfBooted) {
+ ASSERT_TRUE(deviceHandlers != nullptr);
+ const int availableDevices = getAmountOfDevices(NC_USB);
+ if (availableDevices < devicesToBoot) {
+ GTEST_SKIP_("Not enough devices");
+ }
+
+ amountOfBooted = 0;
+ ncDeviceDescr_t ncDeviceDesc = {};
+ ncDeviceDesc.protocol = NC_USB;
+ ncDeviceDesc.platform = NC_ANY_PLATFORM;
+
+ for (int index = 0; index < devicesToBoot; ++index) {
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandlers[index], ncDeviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_TRUE(deviceHandlers[index] != nullptr);
+ ++amountOfBooted;
+ }
+ ASSERT_EQ(amountOfBooted, devicesToBoot) << "Not all devices was loaded";
+}
+
+void MvncTestsCommon::bootOneDevice(ncDeviceProtocol_t deviceProtocol) {
+ if (deviceProtocol == NC_PCIE) {
+ GTEST_FATAL_FAILURE_("Boot doesn't supported for PCIe protocol\n");
+ }
+ ASSERT_NO_ERROR(ncDeviceLoadFirmware(NC_ANY_PLATFORM, firmwarePath));
+}
+
+//------------------------------------------------------------------------------
+// Implementation of class MvncOpenDevice
+//------------------------------------------------------------------------------
+void MvncOpenDevice::SetUp() {
+ MvncTestsCommon::SetUp();
+
+ _deviceProtocol = GetParam();
+ availableDevices_ = getAmountOfDevices(_deviceProtocol);
+}
+
+//------------------------------------------------------------------------------
+// Implementation of class MvncLoggingTests
+//------------------------------------------------------------------------------
+void MvncLoggingTests::SetUp() {
+ MvncOpenDevice::SetUp();
+
+ _deviceDesc.protocol = _deviceProtocol;
+ _deviceDesc.platform = NC_ANY_PLATFORM;
+
+ for (int index = 0; index < availableDevices_; ++index) {
+ ASSERT_NO_ERROR(ncDeviceOpen(&_deviceHandles[index], _deviceDesc, watchdogInterval, firmwarePath));
+ }
+
+ setbuf(stdout, buff);
+ fprintf(stdout, "[workaround for getting full content from XLink]\n");
+}
+
+void MvncLoggingTests::TearDown() {
+ setbuf(stdout, NULL);
+ for (int index = 0; index < availableDevices_; ++index) {
+ ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandles[index]));
+ }
+}
+
+//------------------------------------------------------------------------------
+// Implementation of class MvncGraphAllocations
+//------------------------------------------------------------------------------
+void MvncGraphAllocations::SetUp() {
+ MvncOpenDevice::SetUp();
+
+ // Load blob
+ blobLoaded = readBINFile(blobPath, _blob);
+ if (!blobLoaded) {
+ std::cout << blobPath << " blob for test not found\n";
+ }
+}
+
+void MvncGraphAllocations::TearDown() {
+ for (int index = 0; index < _bootedDevices; ++index) {
+ ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[index]));
+ }
+ _bootedDevices = 0;
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if (defined(_WIN32) || defined(_WIN64))
+#include "windows.h"
+#endif
+
+#include <gtest/gtest.h>
+#include <chrono>
+
+#include "mvnc.h"
+#include "mvnc_ext.h"
+#include "XLinkLog.h"
+#include "mvnc_test_helper.h"
+
+//------------------------------------------------------------------------------
+// Helpers
+//------------------------------------------------------------------------------
+constexpr std::chrono::seconds operator "" _sec(unsigned long long s)
+{
+ return std::chrono::seconds(s);
+}
+
+//------------------------------------------------------------------------------
+// class MvncTestsCommon
+//------------------------------------------------------------------------------
+class MvncTestsCommon : public ::testing::Test {
+public:
+ char firmwarePath[MAX_PATH] = {};
+ mvLog_t ncLogLevel = MVLOG_INFO;
+ int watchdogInterval = 1000;
+ int availableDevices_ = 0;
+
+ ~MvncTestsCommon() override = default;
+ MvncTestsCommon();
+protected:
+
+ void SetUp() override;
+ void TearDown() override;
+
+public:
+ int setLogLevel(const mvLog_t logLevel);
+
+ /**
+ * @brief Boot and open selected amount of device
+ * @param[out] amountOfBooted Amount of device which was booted
+ * @param[out] deviceHandlers Pre-allocated array for handlers
+ */
+ void openDevices(const int devicesToBoot, ncDeviceHandle_t** deviceHandlers,
+ int& amountOfBooted);
+
+ /**
+ * @brief Load firmware to device
+ * @warning Only USB devices is supported
+ */
+ virtual void bootOneDevice(ncDeviceProtocol_t deviceProtocol= NC_USB);
+};
+
+//------------------------------------------------------------------------------
+// class MvncOpenDevice
+//------------------------------------------------------------------------------
+class MvncOpenDevice : public MvncTestsCommon,
+ public testing::WithParamInterface<ncDeviceProtocol_t> {
+protected:
+ ncDeviceProtocol_t _deviceProtocol = NC_ANY_PROTOCOL;
+
+ ~MvncOpenDevice() override = default;
+ void SetUp() override;
+
+};
+
+//------------------------------------------------------------------------------
+// class MvncLoggingTests
+//------------------------------------------------------------------------------
+class MvncLoggingTests : public MvncOpenDevice {
+public:
+ char buff[BUFSIZ] = {};
+protected:
+ ncDeviceHandle_t * _deviceHandles[MAX_DEVICES] = {nullptr};
+ ncDeviceDescr_t _deviceDesc = {};
+
+ void SetUp() override;
+ void TearDown() override;
+ ~MvncLoggingTests() override = default;
+};
+
+//------------------------------------------------------------------------------
+// class MvncGraphAllocations
+//------------------------------------------------------------------------------
+/**
+ * @brief Test transfer data from host to device
+ * @detail Allocate 2 devices and test some graph allocate cases
+ * @warning For correct testing should be used blob with size more than 30mb
+ */
+class MvncGraphAllocations: public MvncOpenDevice {
+public:
+ // Devices
+ ncDeviceHandle_t * _deviceHandle[MAX_DEVICES] = {nullptr};
+ int _bootedDevices = 0;
+
+ // Graphs
+ ncGraphHandle_t* _graphHandle[MAX_DEVICES] = {nullptr};
+
+ // Blob
+ const std::string blobPath = "bvlc_googlenet_fp16.blob";
+ std::vector<char> _blob;
+ bool blobLoaded = false;
+
+protected:
+ void SetUp() override;
+ void TearDown() override;
+ ~MvncGraphAllocations() override = default;
+};
+
+//------------------------------------------------------------------------------
+// class MvncCloseDevice
+//------------------------------------------------------------------------------
+class MvncCloseDevice : public MvncTestsCommon {
+protected:
+ ~MvncCloseDevice() override = default;
+};
+
+//------------------------------------------------------------------------------
+// Parametric tests initialization
+//------------------------------------------------------------------------------
+static const std::vector<ncDeviceProtocol_t> myriadProtocols = {
+ NC_USB,
+ NC_PCIE
+};
+
+static const std::vector<ncDevicePlatform_t> myriadPlatforms = {
+ NC_MYRIAD_2,
+ NC_MYRIAD_X
+};
+
+
+namespace {
+ /**
+ * @brief Converter from enum to string
+ */
+ struct PrintToStringParamName {
+ std::string operator()(
+ const ::testing::TestParamInfo<ncDeviceProtocol_t> &info) const {
+ return ncProtocolToStr(info.param);
+ }
+
+ std::string operator()(
+ const ::testing::TestParamInfo<ncDevicePlatform_t> &info) const {
+ return std::string("USB_") + ncPlatformToStr(info.param);
+ }
+ };
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc_no_boot_test_cases.h"
+
+//------------------------------------------------------------------------------
+// Implementation of class MvncNoBootTests
+//------------------------------------------------------------------------------
+void MvncNoBootTests::bootOneDevice() {
+ // In case already booted device exist, do nothing
+ if (getAmountOfBootedDevices() == 0) {
+ MvncTestsCommon::bootOneDevice(NC_USB);
+ }
+}
+
+//------------------------------------------------------------------------------
+// Implementation of class MvncNoBootOpenDevice
+//------------------------------------------------------------------------------
+void MvncNoBootOpenDevice::SetUp() {
+ MvncNoBootTests::SetUp();
+ available_devices = getAmountOfDevices(NC_USB);
+ ASSERT_TRUE(available_devices > 0);
+
+ // With NO_BOOT option we should boot device with firmware before trying to open
+ bootOneDevice();
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+// class MvncNoBootTests
+//------------------------------------------------------------------------------
+class MvncNoBootTests: public MvncTestsCommon {
+public:
+ void bootOneDevice();
+protected:
+ ~MvncNoBootTests() override = default;
+};
+
+//------------------------------------------------------------------------------
+// class MvncNoBootOpenDevice
+//------------------------------------------------------------------------------
+class MvncNoBootOpenDevice : public MvncNoBootTests {
+public:
+ int available_devices = 0;
+protected:
+ ~MvncNoBootOpenDevice() override = default;
+ void SetUp() override;
+};
+
+//------------------------------------------------------------------------------
+// class MvncNoBootCloseDevice
+//------------------------------------------------------------------------------
+class MvncNoBootCloseDevice : public MvncNoBootTests {
+protected:
+ ~MvncNoBootCloseDevice() override = default;
+};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc_stress_test_cases.h"
+
+//------------------------------------------------------------------------------
+// Implementation of class MvncStressTests
+//------------------------------------------------------------------------------
+void MvncStressTests::SetUp() {
+ MvncTestsCommon::SetUp();
+
+ _deviceProtocol = GetParam();
+ available_devices = getAmountOfDevices(_deviceProtocol);
+ ASSERT_TRUE(available_devices > 0) << ncProtocolToStr(_deviceProtocol)
+ << " devices not found";
+ ASSERT_NO_ERROR(setLogLevel(MVLOG_WARN));
+
+#ifdef NO_BOOT
+ // In case already booted device exist, do nothing
+ if (getAmountOfBootedDevices() == 0) {
+ MvncTestsCommon::bootOneDevice(NC_USB);
+ }
+#endif
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+// class MvncStressTests
+//------------------------------------------------------------------------------
+class MvncStressTests : public MvncTestsCommon,
+ public testing::WithParamInterface<ncDeviceProtocol_t>{
+public:
+ int available_devices = 0;
+
+protected:
+ ~MvncStressTests() override = default;
+ void SetUp() override;
+
+ ncDeviceProtocol_t _deviceProtocol = NC_ANY_PROTOCOL;
+};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc_usb_test_cases.h"
+
+//------------------------------------------------------------------------------
+// Implementation of class MvncOpenUSBDevice
+//------------------------------------------------------------------------------
+void MvncOpenUSBDevice::SetUp() {
+ ncDeviceResetAll();
+ MvncTestsCommon::SetUp();
+
+ availableDevices_ = getAmountOfNotBootedDevices(NC_USB);
+
+ deviceDesc_.protocol = NC_USB;
+ deviceDesc_.platform = NC_ANY_PLATFORM;
+}
+
+//------------------------------------------------------------------------------
+// Implementation of class MvncDevicePlatform
+//------------------------------------------------------------------------------
+void MvncDevicePlatform::SetUp() {
+ MvncOpenUSBDevice::SetUp();
+
+ available_myriadX_ = getAmountOfMyriadXDevices(NC_USB);
+ available_myriad2_ = getAmountOfMyriad2Devices(NC_USB);
+
+ devicePlatform_ = GetParam();
+ deviceDesc_.platform = devicePlatform_;
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+// class MvncOpenUSBDevice
+//------------------------------------------------------------------------------
+class MvncOpenUSBDevice : public MvncTestsCommon {
+public:
+ ncDeviceHandle_t* deviceHandle_ = nullptr;
+ ncDeviceDescr_t deviceDesc_ = {};
+
+ ~MvncOpenUSBDevice() override = default;
+
+protected:
+ void SetUp() override;
+};
+
+//------------------------------------------------------------------------------
+// class MvncCloseUSBDevice
+//------------------------------------------------------------------------------
+class MvncCloseUSBDevice : public MvncOpenUSBDevice {
+};
+
+//------------------------------------------------------------------------------
+// class MvncDevicePlatform
+//------------------------------------------------------------------------------
+class MvncDevicePlatform : public MvncOpenUSBDevice,
+ public testing::WithParamInterface<ncDevicePlatform_t>{
+public:
+ long available_myriadX_ = 0;
+ long available_myriad2_ = 0;
+ ncDevicePlatform_t devicePlatform_;
+
+ ~MvncDevicePlatform() override = default;
+
+protected:
+ void SetUp() override;
+};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <iostream>
+#include "mvnc_data.h"
+#include "mvnc_test_helper.h"
+
+//------------------------------------------------------------------------------
+// Implementations of helpers - counters
+//------------------------------------------------------------------------------
+int getAmountOfDevices(const ncDeviceProtocol_t deviceProtocol,
+ const ncDevicePlatform_t devicePlatform,
+ const XLinkDeviceState_t state) {
+ deviceDesc_t req_deviceDesc = {};
+ req_deviceDesc.protocol = convertProtocolToXlink(deviceProtocol);
+ req_deviceDesc.platform = convertPlatformToXlink(devicePlatform);
+
+ deviceDesc_t deviceDescArray[NC_MAX_DEVICES] = {};
+ unsigned int foundDevices = 0;
+ XLinkFindAllSuitableDevices(
+ state, req_deviceDesc, deviceDescArray, NC_MAX_DEVICES, &foundDevices);
+
+ return foundDevices;
+}
+
+long getAmountOfMyriadXDevices(ncDeviceProtocol_t deviceProtocol) {
+ return getAmountOfDevices(deviceProtocol, NC_MYRIAD_X);
+}
+
+long getAmountOfMyriad2Devices(ncDeviceProtocol_t deviceProtocol) {
+ return getAmountOfDevices(deviceProtocol, NC_MYRIAD_2);
+}
+
+long getAmountOfBootedDevices(ncDeviceProtocol_t deviceProtocol) {
+ return getAmountOfDevices(deviceProtocol, NC_ANY_PLATFORM, X_LINK_BOOTED);
+}
+
+long getAmountOfNotBootedDevices(ncDeviceProtocol_t deviceProtocol) {
+ return getAmountOfDevices(deviceProtocol, NC_ANY_PLATFORM, X_LINK_UNBOOTED);
+}
+
+long getAmountOfPCIeDevices() {
+ return getAmountOfDevices(NC_PCIE);
+}
+
+long getAmountOfUSBDevices() {
+ return getAmountOfDevices(NC_USB);
+}
+
+//------------------------------------------------------------------------------
+// Implementations of helpers - get devices
+//------------------------------------------------------------------------------
+std::vector<std::string> getDevicesList(const ncDeviceProtocol_t deviceProtocol,
+ const ncDevicePlatform_t devicePlatform,
+ const XLinkDeviceState_t state) {
+
+ deviceDesc_t req_deviceDesc = {};
+ req_deviceDesc.protocol = convertProtocolToXlink(deviceProtocol);
+ req_deviceDesc.platform = convertPlatformToXlink(devicePlatform);
+
+ deviceDesc_t deviceDescArray[NC_MAX_DEVICES] = {};
+ unsigned int foundDevices = 0;
+ XLinkFindAllSuitableDevices(
+ state, req_deviceDesc, deviceDescArray, NC_MAX_DEVICES, &foundDevices);
+
+ std::vector < std::string > devNames;
+ for (int i = 0; i < foundDevices; ++i) {
+ devNames.emplace_back(deviceDescArray[i].name);
+ }
+
+ return devNames;
+}
+
+//------------------------------------------------------------------------------
+// Implementation of helpers - comparators
+//------------------------------------------------------------------------------
+bool isMyriadXUSBDevice(const std::string &deviceName) {
+ return (deviceName.find(MYRIAD_X_NAME_STR) != std::string::npos);
+}
+
+bool isMyriad2USBDevice(const std::string &deviceName) {
+ return (deviceName.find(MYRIAD_2_NAME_STR) != std::string::npos);
+}
+
+bool isMyriadPCIeDevice(const std::string &deviceName) {
+ return deviceName.find(std::string(PCIE_NAME_STR)) != std::string::npos;
+}
+
+bool isMyriadUSBDevice(const std::string &deviceName) {
+ return (isMyriad2USBDevice(deviceName)
+ || isMyriadXUSBDevice(deviceName)
+ || isMyriadBootedUSBDevice(deviceName));
+}
+
+bool isMyriadBootedUSBDevice(const std::string &deviceName) {
+ return (!isMyriad2USBDevice(deviceName) &&
+ !isMyriadXUSBDevice(deviceName) &&
+ !isMyriadPCIeDevice(deviceName));
+}
+
+bool isSameProtocolDevice(const std::string &deviceName, const ncDeviceProtocol_t expectedProtocol) {
+ switch (expectedProtocol) {
+ case NC_USB: return isMyriadUSBDevice(deviceName);
+ case NC_PCIE: return isMyriadPCIeDevice(deviceName);
+ case NC_ANY_PROTOCOL:
+ return isMyriadPCIeDevice(deviceName) || isMyriadUSBDevice(deviceName);
+ default:
+ std::cout << "Unknown device protocol" << std::endl;
+ return false;
+ }
+}
+
+bool
+isSamePlatformUSBDevice(const std::string &deviceName, const ncDevicePlatform_t expectedPlatform) {
+ switch (expectedPlatform) {
+ case NC_MYRIAD_2: return isMyriad2USBDevice(deviceName);
+ case NC_MYRIAD_X: return isMyriadXUSBDevice(deviceName);
+ case NC_ANY_PLATFORM:
+ return isMyriad2USBDevice(deviceName) || isMyriadXUSBDevice(deviceName);
+ default:
+ std::cout << "Unknown device platform" << std::endl;
+ return false;
+ }
+}
+
+//------------------------------------------------------------------------------
+// Implementation of helpers - file loader
+//------------------------------------------------------------------------------
+bool readBINFile(const std::string &fileName, std::vector<char> &buf) {
+ std::ifstream file(fileName, std::ios_base::binary | std::ios_base::ate);
+ if (file.fail()) {
+ std::cout << "Can't open file!" << std::endl;
+ return false;
+ }
+ buf.resize(static_cast<unsigned int>(file.tellg()));
+ file.seekg(0);
+ file.read(buf.data(), buf.size());
+ return true;
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <string>
+#include <fstream>
+
+#include "XLink.h"
+#include "mvnc.h"
+
+//------------------------------------------------------------------------------
+// Macroses
+//------------------------------------------------------------------------------
+#define ASSERT_NO_ERROR(call) ASSERT_EQ(call, 0)
+#define ASSERT_ERROR(call) ASSERT_TRUE(call)
+
+
+//------------------------------------------------------------------------------
+// Defines
+//------------------------------------------------------------------------------
+#define MYRIAD_X_NAME_STR "ma2480"
+#define MYRIAD_2_NAME_STR "ma2450"
+
+#if (defined(_WIN32) || defined(_WIN64))
+#define PCIE_NAME_STR "mxlink"
+#else
+#define PCIE_NAME_STR "mxlk"
+#endif
+
+const int MAX_DEVICES = 32;
+const int MAX_DEV_NAME = 20;
+
+#ifndef MAX_PATH
+const int MAX_PATH = 255;
+#endif
+
+//------------------------------------------------------------------------------
+// Usb initialization
+//------------------------------------------------------------------------------
+// Without this initialization find device on windows could not work
+#if (defined(_WIN32) || defined(_WIN64) )
+extern "C" void initialize_usb_boot();
+#else
+#define initialize_usb_boot()
+#endif
+
+
+//------------------------------------------------------------------------------
+// Helpers - counters
+//------------------------------------------------------------------------------
+/**
+ * @brief Get amount of all currently connected Myriad devices
+ * @param[in] deviceProtocol Count only platform specific devices
+ */
+int getAmountOfDevices(const ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL,
+ const ncDevicePlatform_t devicePlatform = NC_ANY_PLATFORM,
+ const XLinkDeviceState_t state = X_LINK_ANY_STATE);
+
+long getAmountOfMyriadXDevices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL);
+
+long getAmountOfMyriad2Devices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL);
+
+long getAmountOfBootedDevices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL);
+
+long getAmountOfNotBootedDevices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL);
+
+long getAmountOfPCIeDevices();
+
+long getAmountOfUSBDevices();
+
+//------------------------------------------------------------------------------
+// Helpers - get devices
+//------------------------------------------------------------------------------
+/**
+ * @brief Get list of all currently connected Myriad devices
+ */
+std::vector<std::string> getDevicesList(
+ const ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL,
+ const ncDevicePlatform_t devicePlatform = NC_ANY_PLATFORM,
+ const XLinkDeviceState_t state = X_LINK_ANY_STATE);
+
+//------------------------------------------------------------------------------
+// Helpers - comparators
+//------------------------------------------------------------------------------
+bool isMyriadXUSBDevice(const std::string &deviceName);
+
+bool isMyriad2USBDevice(const std::string &deviceName);
+
+bool isMyriadPCIeDevice(const std::string& deviceName);
+
+/**
+ * @warning The booted USB device will also be counted here.
+ */
+bool isMyriadUSBDevice(const std::string& deviceName);
+
+bool isMyriadBootedUSBDevice(const std::string &deviceName);
+
+/**
+ * @brief Check that device matches the specified protocol
+ */
+bool isSameProtocolDevice(const std::string &deviceName,
+ const ncDeviceProtocol_t expectedProtocol);
+
+/**
+* @brief Check that device matches the specified platform for USB
+*/
+bool isSamePlatformUSBDevice(const std::string &deviceName,
+ const ncDevicePlatform_t expectedPlatform);
+
+//------------------------------------------------------------------------------
+// Helpers - file loader
+//------------------------------------------------------------------------------
+/**
+ * @brief Read blob
+ * @param fileName Path to blob from bin directory
+ * @return True if blob is readed without problem
+ */
+bool readBINFile(const std::string& fileName, std::vector<char>& buf);
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc.h"
+#include "mvnc_no_boot_test_cases.h"
+
+//------------------------------------------------------------------------------
+// MvncNoBootOpenDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open any device and close it
+*/
+TEST_F(MvncNoBootOpenDevice, OpenAndClose) {
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = NC_USB;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+* @brief Try to open device twice. DeviceHandle shouldn't be overwritten
+*/
+TEST_F(MvncNoBootOpenDevice, OpenTwiceSameHandler) {
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = NC_USB;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ char dev_addr_first_open[MAX_DEV_NAME];
+ unsigned int data_lenght_first = MAX_DEV_NAME;
+
+ char dev_addr_second_open[MAX_DEV_NAME];
+ unsigned int data_lenght_second = MAX_DEV_NAME;
+
+ // First open, get device name
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
+ dev_addr_first_open, &data_lenght_first));
+
+ // Second open, get device name
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
+ dev_addr_second_open, &data_lenght_second));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ // Should be the same device
+ ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
+}
+
+
+/**
+ * @brief Open device twice one run after another. It should check, that link to device closed correctly
+ * @note Mostly this test important for PCIE and connect to booted option, as in that cases XLinkReset have another behavior
+ */
+TEST_F(MvncNoBootOpenDevice, OpenDeviceWithOneXLinkInitializion) {
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = NC_USB;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+ // Second open
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+}
+
+//------------------------------------------------------------------------------
+// MvncNoBootCloseDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Correct closing if handle is empty
+*/
+TEST_F(MvncNoBootCloseDevice, EmptyDeviceHandler) {
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+* @brief Device, which was booted before open, shouldn't reboot after ncDeviceClose call
+*/
+TEST_F(MvncNoBootCloseDevice, AlreadyBootedDeviceWillNotReboot) {
+ bootOneDevice();
+
+ ASSERT_EQ(getAmountOfBootedDevices(), 1);
+
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = NC_USB;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+ ASSERT_EQ(getAmountOfBootedDevices(), 1);
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "mvnc.h"
+#include "ncPrivateTypes.h"
+#include "mvnc_stress_test_cases.h"
+
+//------------------------------------------------------------------------------
+// MvncStressTests Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open and close device for 1001 times
+*/
+TEST_P(MvncStressTests, OpenClose1001) {
+ const int iterations = 1001;
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = _deviceProtocol;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ for (int i = 0; i < iterations; ++i) {
+ printf("Iteration %d of %d\n", i, iterations);
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ deviceHandle = nullptr;
+ }
+}
+
+/**
+* @brief Allocate and deallocate graph on device for 1001 times
+*/
+TEST_P(MvncStressTests, AllocateDeallocateGraph1001) {
+ const int iterations = 1001;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = _deviceProtocol;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ // Load graph
+ const std::string blobPath = "bvlc_googlenet_fp16.blob";
+ std::vector<char> _blob;
+
+ if (!readBINFile(blobPath, _blob)) GTEST_SKIP_("Blob not found\n");
+
+ // Open device
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+
+ for (int i = 0; i < iterations; ++i) {
+ printf("Iteration %d of %d\n", i, iterations);
+
+ // Create graph handlers
+ ncGraphHandle_t* graphHandle = nullptr;
+ std::string graphName = "graph";
+
+ ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &graphHandle));
+ ASSERT_TRUE(graphHandle != nullptr);
+
+ // Allocate graph
+ ASSERT_NO_ERROR(ncGraphAllocate(deviceHandle, graphHandle,
+ _blob.data(), _blob.size(), // Blob
+ _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2)) ); // Header
+
+ // Destroy graph
+ ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle));
+ }
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+
+/**
+* @brief Run the full cycle of inference 101 times.
+* It includes opening device, allocating graph and fifos, inference,
+ * destroying graph and fifos, closing device
+*/
+TEST_P(MvncStressTests, FullCycleOfWork101Times) {
+ const int iterations = 101;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = _deviceProtocol;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ const std::string blobPath = "bvlc_googlenet_fp16.blob";
+ std::vector<char> blob;
+ if (!readBINFile(blobPath, blob)) GTEST_SKIP_("Blob not found\n");
+
+ for (int i = 0; i < iterations; i++) {
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+
+ ncGraphHandle_t* graphHandle = nullptr;
+ std::string graphName = "graph";
+ ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &graphHandle));
+ ASSERT_TRUE(graphHandle != nullptr);
+
+ ASSERT_NO_ERROR(ncGraphAllocate(deviceHandle, graphHandle,
+ blob.data(), blob.size(), // Blob
+ blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) ));
+
+
+ unsigned int dataLength = sizeof(int);
+
+ int numInputs = 0;
+ ASSERT_NO_ERROR(ncGraphGetOption(graphHandle, NC_RO_GRAPH_INPUT_COUNT, &numInputs, &dataLength));
+
+ int numOutputs = 0;
+ ASSERT_NO_ERROR(ncGraphGetOption(graphHandle, NC_RO_GRAPH_OUTPUT_COUNT, &numOutputs, &dataLength));
+
+ dataLength = sizeof(ncTensorDescriptor_t);
+
+ ncTensorDescriptor_t inputDesc = {};
+ ASSERT_NO_ERROR(ncGraphGetOption(graphHandle, NC_RO_GRAPH_INPUT_TENSOR_DESCRIPTORS, &inputDesc,
+ &dataLength));
+
+
+ ncTensorDescriptor_t outputDesc = {};
+ ASSERT_NO_ERROR(ncGraphGetOption(graphHandle, NC_RO_GRAPH_OUTPUT_TENSOR_DESCRIPTORS, &outputDesc,
+ &dataLength));
+
+ unsigned int fifo_elements = 4;
+
+ ncFifoHandle_t *inputFifoHandle = nullptr;
+ ASSERT_NO_ERROR(ncFifoCreate("input", NC_FIFO_HOST_WO, &inputFifoHandle));
+
+ ASSERT_NO_ERROR(ncFifoAllocate(inputFifoHandle, deviceHandle, &inputDesc, fifo_elements));
+
+ ncFifoHandle_t *outputFifoHandle = nullptr;
+ ASSERT_NO_ERROR(ncFifoCreate("output", NC_FIFO_HOST_RO, &outputFifoHandle));
+
+ ASSERT_NO_ERROR(ncFifoAllocate(outputFifoHandle, deviceHandle, &outputDesc, fifo_elements));
+
+ uint8_t *input_data = new uint8_t[inputDesc.totalSize];
+ uint8_t *result_data = new uint8_t[outputDesc.totalSize];
+ ASSERT_NO_ERROR(ncGraphQueueInferenceWithFifoElem(graphHandle,
+ inputFifoHandle, outputFifoHandle,
+ input_data, &inputDesc.totalSize, nullptr));
+
+ void *userParam = nullptr;
+ ASSERT_NO_ERROR(ncFifoReadElem(outputFifoHandle, result_data, &outputDesc.totalSize, &userParam));
+
+ delete[] input_data;
+ delete[] result_data;
+ ASSERT_NO_ERROR(ncFifoDestroy(&inputFifoHandle));
+ ASSERT_NO_ERROR(ncFifoDestroy(&outputFifoHandle));
+
+ ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ }
+
+}
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+ MvncStressTests,
+ ::testing::ValuesIn(myriadProtocols),
+ PrintToStringParamName());
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <thread>
+#include "mvnc.h"
+#include "ncPrivateTypes.h"
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+// MvncTestsCommon Tests
+// Platform independent tests
+//------------------------------------------------------------------------------
+TEST_F(MvncTestsCommon, DoubleCheckOfAvailableDevicesCount) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ const int min_name_size = 2;
+
+ struct ncDeviceDescr_t act_devices[NC_MAX_DEVICES] = {};
+ int act_devicesCount = 0;
+ int exp_devicesCount = getAmountOfNotBootedDevices();
+
+ ASSERT_NO_ERROR(ncAvailableDevices(act_devices, NC_MAX_DEVICES, &act_devicesCount));
+
+ ASSERT_TRUE(act_devicesCount);
+ ASSERT_EQ(act_devicesCount, exp_devicesCount);
+
+ for (int i = 0; i < act_devicesCount; ++i) {
+ ASSERT_GE(strlen(act_devices[i].name), min_name_size);
+ }
+
+ for (int j = act_devicesCount; j < NC_MAX_DEVICES; ++j) {
+ ASSERT_EQ(strlen(act_devices[j].name), 0);
+ }
+}
+
+TEST_F(MvncTestsCommon, AvailableDevicesSholdReturnErrorIfArrayIsNULL) {
+ int act_devicesCount = 0;
+ ASSERT_ERROR(ncAvailableDevices(NULL, NC_MAX_DEVICES, &act_devicesCount));
+}
+
+TEST_F(MvncTestsCommon, AvailableDevicesSholdReturnErrorIfCountPtrIsNULL) {
+ struct ncDeviceDescr_t act_devices[NC_MAX_DEVICES] = {};
+ ASSERT_ERROR(ncAvailableDevices(act_devices, NC_MAX_DEVICES, NULL));
+}
+
+TEST_F(MvncTestsCommon, CanGetPCIeAndUSB) {
+ if (!(getAmountOfUSBDevices() && getAmountOfPCIeDevices()))
+ GTEST_SKIP_("USB and PCIe not available");
+
+ struct ncDeviceDescr_t act_devices[NC_MAX_DEVICES] = {};
+ int act_devicesCount = 0;
+ ASSERT_NO_ERROR(ncAvailableDevices(act_devices, NC_MAX_DEVICES, &act_devicesCount));
+
+ bool usb_device_found = false;
+ bool pcie_device_found = false;
+
+ for (int i = 0; i < act_devicesCount; ++i) {
+ if (isMyriadUSBDevice(act_devices[i].name)) {
+ usb_device_found = true;
+ } else if (isMyriadPCIeDevice(act_devices[i].name)) {
+ pcie_device_found = true;
+ }
+ }
+
+ EXPECT_TRUE(usb_device_found);
+ EXPECT_TRUE(pcie_device_found);
+}
+
+TEST_F(MvncTestsCommon, ShouldFailToSetNegativeTimeout) {
+ ASSERT_ERROR(ncSetDeviceConnectTimeout(-1));
+}
+
+//------------------------------------------------------------------------------
+// MvncTestsCommon Tests
+// PCIe + USB Tests
+//------------------------------------------------------------------------------
+
+/**
+ * @brief Test that USB and PCIe works at the same time. USB first
+ */
+TEST_F(MvncTestsCommon, OpenUSBThenPCIEAndClose) {
+ if (getAmountOfPCIeDevices() == 0)
+ GTEST_SKIP() << "PCIe devices not found";
+ if (getAmountOfUSBDevices() == 0)
+ GTEST_SKIP() << "USB devices not found";
+
+ ncDeviceHandle_t *deviceHandle_USB = nullptr;
+ ncDeviceHandle_t *deviceHandle_PCIe = nullptr;
+ std::string actDeviceName;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = NC_USB;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, watchdogInterval, firmwarePath));
+
+ actDeviceName = deviceHandle_USB->private_data->dev_addr;
+ ASSERT_TRUE(actDeviceName.size());
+ ASSERT_TRUE(isMyriadUSBDevice(actDeviceName));
+
+ // Open PCIe device
+ deviceDesc.protocol = NC_PCIE;
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, watchdogInterval, firmwarePath));
+
+ actDeviceName = deviceHandle_PCIe->private_data->dev_addr;
+ ASSERT_TRUE(actDeviceName.size());
+ ASSERT_TRUE(isMyriadPCIeDevice(actDeviceName));
+
+ // Close all
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB));
+}
+
+/**
+ * @brief Test that USB and PCIe works at the same time. PCIe first
+ */
+TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) {
+ if (getAmountOfPCIeDevices() == 0)
+ GTEST_SKIP() << "PCIe devices not found";
+ if (getAmountOfUSBDevices() == 0)
+ GTEST_SKIP() << "USB devices not found";
+
+ ncDeviceHandle_t *deviceHandle_USB = nullptr;
+ ncDeviceHandle_t *deviceHandle_PCIe = nullptr;
+ std::string actDeviceName;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = NC_PCIE;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ // Open PCIe device
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc,
+ watchdogInterval, firmwarePath));
+
+ actDeviceName = deviceHandle_PCIe->private_data->dev_addr;
+ ASSERT_TRUE(actDeviceName.size());
+ ASSERT_TRUE(isMyriadPCIeDevice(actDeviceName));
+
+ // Open USB device
+ deviceDesc.protocol = NC_USB;
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc,
+ watchdogInterval, firmwarePath));
+
+ actDeviceName = deviceHandle_USB->private_data->dev_addr;
+ ASSERT_TRUE(actDeviceName.size());
+ ASSERT_TRUE(isMyriadUSBDevice(actDeviceName));
+
+
+ // Close all
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB));
+}
+
+//------------------------------------------------------------------------------
+// MvncOpenDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open any device and close it
+*/
+TEST_P(MvncOpenDevice, OpenAndClose) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+ ncDeviceHandle_t* deviceHandle = nullptr;
+ std::string deviceName;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = _deviceProtocol;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+
+ ASSERT_TRUE(deviceHandle != nullptr);
+ ASSERT_TRUE(deviceHandle->private_data != nullptr);
+ ASSERT_TRUE(deviceHandle->private_data->dev_addr_booted != nullptr);
+
+ deviceName = deviceHandle->private_data->dev_addr_booted;
+ ASSERT_TRUE(deviceName.size() > 0);
+
+ ASSERT_TRUE(isSameProtocolDevice(deviceName, _deviceProtocol));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+ * @brief Check that all field of deviceHandle would be initialized
+ */
+TEST_P(MvncOpenDevice, AllHandleFieldsInitialized) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+ ncDeviceHandle_t* deviceHandle = nullptr;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = _deviceProtocol;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
+ watchdogInterval, firmwarePath));
+
+ ASSERT_TRUE(deviceHandle != nullptr);
+
+ devicePrivate_t * device = deviceHandle->private_data;
+ ASSERT_TRUE(device != nullptr);
+ ASSERT_TRUE(device->dev_addr != nullptr);
+ ASSERT_TRUE(device->dev_addr_booted != nullptr);
+ ASSERT_TRUE(device->xlink != nullptr);
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+* @brief Try to open device twice. DeviceHandle shouldn't be overwritten
+* @details Expected behavior - ncDeviceOpen should warn that deviceHandle
+ * already has allocated device
+*/
+TEST_P(MvncOpenDevice, OpenTwiceSameHandler) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = _deviceProtocol;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ char dev_addr_first_open[MAX_DEV_NAME];
+ unsigned int data_lenght_first = MAX_DEV_NAME;
+
+ char dev_addr_second_open[MAX_DEV_NAME];
+ unsigned int data_lenght_second = MAX_DEV_NAME;
+
+ // First open, get device name
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
+ dev_addr_first_open, &data_lenght_first));
+
+ // Second open, get device name
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
+ dev_addr_second_open, &data_lenght_second));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+ // Should be the same device
+ ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
+}
+
+/**
+ * @brief Try to open device twice with different handlers. Second open should return error
+ * @reason #-18548
+ */
+ // Fixme Test only for one device
+TEST_P(MvncOpenDevice, DISABLED_OpenSameDeviceTwiceDifferentHandlers) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+ ncDeviceHandle_t *deviceHandle1 = nullptr;
+ ncDeviceHandle_t *deviceHandle2 = nullptr;
+
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = _deviceProtocol;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle1, deviceDesc,
+ watchdogInterval, firmwarePath));
+
+ // Till we don't have multiple device support, this function would try to open same device
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle2, deviceDesc,
+ watchdogInterval, firmwarePath));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle1));
+}
+
+
+/**
+ * @brief Open device twice one run after another. It should check, that link to device closed correctly
+ * @note Mostly this test important for PCIe and connect to booted option, as in that cases XLinkReset have another behavior
+ */
+TEST_P(MvncOpenDevice, OpenTwiceWithOneXLinkInitializion) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ std::string actDeviceName;
+
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = _deviceProtocol;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
+ watchdogInterval, firmwarePath));
+
+ actDeviceName = deviceHandle->private_data->dev_addr;
+ ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+ // Second open
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
+ watchdogInterval, firmwarePath));
+
+ actDeviceName = deviceHandle->private_data->dev_addr;
+ ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+//------------------------------------------------------------------------------
+// MvncLoggingTests Tests
+//------------------------------------------------------------------------------
+TEST_P(MvncLoggingTests, ShouldNotPrintErrorMessagesIfCanNotOpenDevice) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+ setLogLevel(MVLOG_INFO);
+ ncDeviceHandle_t * deviceHandle = nullptr;
+
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle, _deviceDesc, watchdogInterval, firmwarePath));
+
+ std::string content(buff);
+ for (int i = MVLOG_WARN; i < MVLOG_LAST; i++) {
+ auto found = content.find(mvLogHeader[i]);
+ ASSERT_TRUE(found == std::string::npos);
+ }
+}
+
+//------------------------------------------------------------------------------
+// MvncGraphAllocations Tests
+//------------------------------------------------------------------------------
+/**
+ * @brief Allocate graph for one device
+ */
+TEST_P(MvncGraphAllocations, DISABLED_OneGraph) {
+ if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
+ openDevices(1, _deviceHandle, _bootedDevices);
+
+ // Create graph handlers
+ std::string graphName = "graph";
+ ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &_graphHandle[0]));
+ ASSERT_TRUE(_graphHandle[0] != nullptr);
+
+ // Allocate graph
+ ASSERT_NO_ERROR(ncGraphAllocate(_deviceHandle[0], _graphHandle[0],
+ _blob.data(), _blob.size(), // Blob
+ _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) )); // Header
+}
+
+/**
+ * @brief Allocate graphs for 2 device (serial)
+ */
+TEST_P(MvncGraphAllocations, DISABLED_AllocateGraphsOn2DevicesSerial) {
+ if (!blobLoaded)
+ GTEST_SKIP_("Blob for test is not loaded\n");
+ openDevices(2, _deviceHandle, _bootedDevices);
+
+ // Create graphs handlers
+ for (int index = 0; index < _bootedDevices; ++index) {
+ std::string graphName = "graph";
+ graphName += std::to_string(index);
+ ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &_graphHandle[index]));
+ ASSERT_TRUE(_graphHandle[index] != nullptr);
+ }
+
+ // Allocate graphs in serial mode
+ ncStatus_t rc[MAX_DEVICES];
+
+ for (int i = 0; i < _bootedDevices; ++i) {
+ rc[i] = ncGraphAllocate(_deviceHandle[0], _graphHandle[0],
+ _blob.data(), _blob.size(), // Blob
+ _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) ); // Header
+ }
+
+ for (int i = 0; i < _bootedDevices; ++i) {
+ ASSERT_NO_ERROR(rc[i]);
+ }
+}
+
+/**
+* @brief Allocate graphs for 2 device (parallel)
+* @detail Open devices and then in parallel threads try to load graphs to it
+* The error easy appear, if USBLINK_TRANSFER_SIZE is (1024 * 1024 * 20)
+* @warning It's depend on USBLINK_TRANSFER_SIZE constant from UsbLinkPlatform.c file
+* @warning Need blob to use this tests
+*/
+TEST_P(MvncGraphAllocations, DISABLED_AllocateGraphsOn2DevicesParallel) {
+ if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
+ openDevices(2, _deviceHandle, _bootedDevices);
+
+ // Create graphs handlers
+ for (int index = 0; index < _bootedDevices; ++index) {
+ std::string graphName = "graph";
+ graphName += std::to_string(index);
+ ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &_graphHandle[index]));
+ ASSERT_TRUE(_graphHandle[index] != nullptr);
+ }
+
+ // Allocate graphs in parallel threads
+ std::thread requests[MAX_DEVICES];
+ ncStatus_t rc[MAX_DEVICES];
+ for (int i = 0; i < _bootedDevices; ++i) {
+ requests[i] = std::thread([i, &rc, this]() {
+ rc[i] = ncGraphAllocate(_deviceHandle[0], _graphHandle[0],
+ _blob.data(), _blob.size(), // Blob
+ _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) );
+ });
+ }
+
+ for (int i = 0; i < _bootedDevices; ++i) {
+ requests[i].join();
+ ASSERT_NO_ERROR(rc[i]);
+ }
+}
+
+//------------------------------------------------------------------------------
+// MvncCloseDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Correct closing if handle is empty
+*/
+TEST_F(MvncCloseDevice, EmptyDeviceHandler) {
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+* @brief Correct closing if some handler fields is null
+*/
+TEST_F(MvncCloseDevice, EmptyFieldsOfDeviceHandle) {
+
+ ncDeviceHandle_t *deviceHandlePtr;
+ auto dH = std::unique_ptr<ncDeviceHandle_t, decltype(std::free)*>(
+ (ncDeviceHandle_t*)calloc(1, sizeof(ncDeviceHandle_t)), std::free);
+
+ auto d = std::unique_ptr<_devicePrivate_t, decltype(std::free)*>(
+ (_devicePrivate_t*)calloc(1, sizeof(_devicePrivate_t)), std::free);
+
+ if (dH.get() && d.get()) {
+ dH->private_data = d.get();
+ d->dev_addr = nullptr;
+ d->dev_addr_booted = nullptr;
+ d->device_mon_stream_id = INVALID_LINK_ID;
+ d->graph_monitor_stream_id = INVALID_LINK_ID;
+ d->wd_interval = watchdogInterval;
+ deviceHandlePtr = dH.get();
+ }
+
+ ASSERT_EQ(ncDeviceClose(&deviceHandlePtr), NC_INVALID_PARAMETERS);
+}
+
+//------------------------------------------------------------------------------
+// MvncInference Tests
+//------------------------------------------------------------------------------
+using MvncInference = MvncGraphAllocations;
+
+TEST_P(MvncInference, DISABLED_DoOneIterationOfInference) {
+ if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
+ openDevices(1, _deviceHandle, _bootedDevices);
+
+ std::string graphName = "graph";
+ ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &_graphHandle[0]));
+ ASSERT_TRUE(&_graphHandle[0] != nullptr);
+
+ ASSERT_NO_ERROR(ncGraphAllocate(_deviceHandle[0], _graphHandle[0],
+ _blob.data(), _blob.size(), // Blob
+ _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) ));
+
+
+ unsigned int dataLength = sizeof(int);
+
+ int numInputs = 0;
+ ASSERT_NO_ERROR(ncGraphGetOption(_graphHandle[0], NC_RO_GRAPH_INPUT_COUNT, &numInputs, &dataLength));
+
+ int numOutputs = 0;
+ ASSERT_NO_ERROR(ncGraphGetOption(_graphHandle[0], NC_RO_GRAPH_OUTPUT_COUNT, &numOutputs, &dataLength));
+
+ dataLength = sizeof(ncTensorDescriptor_t);
+
+ ncTensorDescriptor_t inputDesc = {};
+ ASSERT_NO_ERROR(ncGraphGetOption(_graphHandle[0], NC_RO_GRAPH_INPUT_TENSOR_DESCRIPTORS, &inputDesc,
+ &dataLength));
+
+
+ ncTensorDescriptor_t outputDesc = {};
+ ASSERT_NO_ERROR(ncGraphGetOption(_graphHandle[0], NC_RO_GRAPH_OUTPUT_TENSOR_DESCRIPTORS, &outputDesc,
+ &dataLength));
+
+ unsigned int fifo_elements = 4;
+
+ ncFifoHandle_t *inputFifoHandle = nullptr;
+ ASSERT_NO_ERROR(ncFifoCreate("input", NC_FIFO_HOST_WO, &inputFifoHandle));
+
+ ASSERT_NO_ERROR(ncFifoAllocate(inputFifoHandle, _deviceHandle[0], &inputDesc, fifo_elements));
+
+ ncFifoHandle_t *outputFifoHandle = nullptr;
+ ASSERT_NO_ERROR(ncFifoCreate("output", NC_FIFO_HOST_RO, &outputFifoHandle));
+
+ ASSERT_NO_ERROR(ncFifoAllocate(outputFifoHandle, _deviceHandle[0], &outputDesc, fifo_elements));
+
+ uint8_t *input_data = new uint8_t[inputDesc.totalSize];
+ uint8_t *result_data = new uint8_t[outputDesc.totalSize];
+ ASSERT_NO_ERROR(ncGraphQueueInferenceWithFifoElem(_graphHandle[0],
+ inputFifoHandle, outputFifoHandle,
+ input_data, &inputDesc.totalSize, nullptr));
+
+ void *userParam = nullptr;
+ ASSERT_NO_ERROR(ncFifoReadElem(outputFifoHandle, result_data, &outputDesc.totalSize, &userParam));
+
+ delete[] input_data;
+ delete[] result_data;
+ ASSERT_NO_ERROR(ncFifoDestroy(&inputFifoHandle));
+ ASSERT_NO_ERROR(ncFifoDestroy(&outputFifoHandle));
+
+ ASSERT_NO_ERROR(ncGraphDestroy(&_graphHandle[0]));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[0]));
+}
+
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+ MvncOpenDevice,
+ ::testing::ValuesIn(myriadProtocols),
+ PrintToStringParamName());
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+ MvncLoggingTests,
+ ::testing::ValuesIn(myriadProtocols),
+ PrintToStringParamName());
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+ MvncGraphAllocations,
+ ::testing::ValuesIn(myriadProtocols),
+ PrintToStringParamName());
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+ MvncInference,
+ ::testing::ValuesIn(myriadProtocols),
+ PrintToStringParamName());
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <thread>
+
+#include "mvnc.h"
+#include "mvnc_test_helper.h"
+#include "mvnc_usb_test_cases.h"
+#include "ncPrivateTypes.h"
+
+//------------------------------------------------------------------------------
+// MvncOpenUSBDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open any device with custom firmware path as ncDeviceOpen argument
+*/
+
+TEST_F(MvncOpenUSBDevice, ShouldOpenDeviceAfterChangeConnectTimeoutFromZero) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ ncDeviceHandle_t *deviceHandle = nullptr;
+ std::string actDeviceName;
+ ncDeviceDescr_t deviceDesc = {};
+ deviceDesc.protocol = NC_ANY_PROTOCOL;
+ deviceDesc.platform = NC_ANY_PLATFORM;
+
+ ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(0));
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ std::this_thread::sleep_for(3_sec);
+ ASSERT_NO_ERROR(ncDeviceResetAll());
+
+ ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(30));
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+ ASSERT_NO_ERROR(ncDeviceResetAll());
+}
+
+
+TEST_F(MvncOpenUSBDevice, WithCustomFirmware) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ // Use custom firmware dir path as parameter for ncDeviceOpen
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+}
+
+/**
+* @brief Open all available devices and close them
+*/
+TEST_F(MvncOpenUSBDevice, AllAvailableDevices) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ ncDeviceHandle_t * deviceHandles[MAX_DEVICES] = {nullptr};
+
+ for (int index = 0; index < availableDevices_; ++index) {
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandles[index], deviceDesc_, watchdogInterval, firmwarePath));
+ }
+ for (int index = 0; index < availableDevices_; ++index) {
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandles[index]));
+ }
+}
+
+/**
+* @brief Open all available devices in parallel threads and close them
+*/
+TEST_F(MvncOpenUSBDevice, AllAvailableMultiThreads) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ std::thread requests[MAX_DEVICES];
+ ncDeviceHandle_t * deviceHandle[MAX_DEVICES] = {nullptr};
+ ncStatus_t rc[MAX_DEVICES];
+
+ for (int i = 0; i < availableDevices_; ++i) {
+ requests[i] = std::thread([i, &rc, &deviceHandle, this]() {
+ rc[i] = ncDeviceOpen(&deviceHandle[i], deviceDesc_, watchdogInterval, firmwarePath);
+ });
+ }
+
+ for (int i = 0; i < availableDevices_; ++i) {
+ requests[i].join();
+ ASSERT_NO_ERROR(rc[i]);
+ }
+
+ for (int i = 0; i < availableDevices_; ++i) {
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle[i]));
+ }
+}
+
+/**
+* @brief Open any device with invalid firmware path
+*/
+TEST_F(MvncOpenUSBDevice, WithInvalidFirmwarePath) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ const char invalidPath[MAX_PATH] = "./InvalidPath/";
+
+ // Use custom firmware dir path as parameter for ncDeviceOpen
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, invalidPath));
+
+ ASSERT_EQ(deviceHandle_, nullptr);
+}
+
+TEST_F(MvncOpenUSBDevice, OpenAvailableDeviceByName) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ char dev_addr_open[NC_MAX_NAME_SIZE];
+ unsigned int data_lenght = NC_MAX_NAME_SIZE;
+
+ auto availableDevices = getDevicesList();
+
+ ASSERT_TRUE(availableDevices.size());
+ strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
+ dev_addr_open, &data_lenght));
+
+ ASSERT_TRUE(strncmp(dev_addr_open, deviceDesc_.name, NC_MAX_NAME_SIZE) == 0);
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+}
+
+TEST_F(MvncOpenUSBDevice, ErrorWhenWrongDeviceName) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ char badName[] = "BadName";
+
+ strncpy(deviceDesc_.name, badName, NC_MAX_NAME_SIZE);
+
+ auto availableDevices = getDevicesList();
+ ASSERT_TRUE(availableDevices.size());
+
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+}
+
+TEST_F(MvncOpenUSBDevice, OpenTwiceSameHandlerByName) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ char dev_addr_first_open[MAX_DEV_NAME];
+ unsigned int data_lenght_first = MAX_DEV_NAME;
+
+ char dev_addr_second_open[MAX_DEV_NAME];
+ unsigned int data_lenght_second = MAX_DEV_NAME;
+
+ auto availableDevices = getDevicesList();
+
+ ASSERT_TRUE(availableDevices.size());
+ strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
+ dev_addr_first_open, &data_lenght_first));
+
+ // Second open, get device name
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+ ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
+ dev_addr_second_open, &data_lenght_second));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+ // Should be the same device
+ ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
+}
+
+TEST_F(MvncOpenUSBDevice, CheckErrorWhenPlatformConflictWithName) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ ncDevicePlatform_t wrongPlatform = NC_ANY_PLATFORM;
+ auto availableDevices = getDevicesList();
+
+ ASSERT_TRUE(availableDevices.size());
+
+ if(isMyriadXUSBDevice(availableDevices[0])) {
+ wrongPlatform = NC_MYRIAD_2;
+ } else {
+ wrongPlatform = NC_MYRIAD_X;
+ }
+
+ strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
+ deviceDesc_.platform = wrongPlatform;
+
+ ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+}
+
+//------------------------------------------------------------------------------
+// MvncCloseUSBDevice Tests
+//------------------------------------------------------------------------------
+#if (!(defined(_WIN32) || defined(_WIN64)))
+TEST_F(MvncCloseUSBDevice, USBDeviceWillBeAvailableRightAfterClosing) {
+ if (availableDevices_ == 0)
+ GTEST_SKIP();
+
+ ASSERT_NO_ERROR(ncDeviceOpen(
+ &deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+
+ ASSERT_TRUE(deviceHandle_);
+
+ deviceDesc_t toFindDeviceDescr = {
+ .protocol = X_LINK_USB_VSC,
+ .platform = X_LINK_ANY_PLATFORM
+ };
+ strcpy(deviceDesc_.name, deviceHandle_->private_data->dev_addr);
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+
+ deviceDesc_t foundDevice = {};
+ XLinkError_t rc = XLinkFindFirstSuitableDevice(
+ X_LINK_UNBOOTED, toFindDeviceDescr, &foundDevice);
+ ASSERT_EQ(X_LINK_SUCCESS, rc);
+}
+#endif
+
+//------------------------------------------------------------------------------
+// MvncDevicePlatform Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open specified device and close it
+*/
+TEST_P(MvncDevicePlatform, OpenAndClose) {
+ if (available_myriad2_ == 0 || available_myriadX_ == 0)
+ GTEST_SKIP();
+
+ ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+
+ char deviceName[MAX_DEV_NAME];
+ unsigned int size = MAX_DEV_NAME;
+ ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME, deviceName, &size));
+
+ EXPECT_TRUE(isSamePlatformUSBDevice(deviceName, devicePlatform_));
+
+ ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+
+}
+
+INSTANTIATE_TEST_CASE_P(MvncTestsPlatform,
+ MvncDevicePlatform,
+ ::testing::ValuesIn(myriadPlatforms),
+ PrintToStringParamName());
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ncCommPrivate.h"
+#include "mvnc_test_helper.h"
+
+#include <gtest/gtest.h>
+#include <fstream>
+
+extern "C" {
+#include "XLinkStringUtils.h"
+}
+
+class MvncUtilsTest : public ::testing::Test {
+public:
+ void TearDown() override {
+ std::remove(mvcmdExpectedPath.c_str());
+ }
+
+protected:
+ std::string mvcmdExpectedPath = "";
+ // FIXME: seems it is not going to work on Windows
+ const std::string tmpDir = "/tmp";
+};
+
+TEST_F(MvncUtilsTest, CanGetSpecialFWIfUniversalIsNotPresent) {
+ mvcmdExpectedPath = tmpDir + "/usb-ma248x.mvcmd";
+
+ std::ofstream mvcmd;
+ mvcmd.open(mvcmdExpectedPath, std::ios::out);
+
+ char mvcmdFilePath[MAX_PATH] = "";
+ mv_strcpy(mvcmdFilePath, MAX_PATH, tmpDir.c_str());
+
+ deviceDesc_t dummyDevDesc2480;
+ strcpy(dummyDevDesc2480.name, "0-ma2480");
+ dummyDevDesc2480.protocol = X_LINK_USB_VSC;
+ dummyDevDesc2480.platform = X_LINK_MYRIAD_X;
+
+ ASSERT_EQ(NC_OK, getFirmwarePath(mvcmdFilePath, MAX_PATH, dummyDevDesc2480));
+ ASSERT_STRCASEEQ(mvcmdExpectedPath.c_str(), mvcmdFilePath);
+}
+
+TEST_F(MvncUtilsTest, CanGetUniversalFWIfItExists) {
+ mvcmdExpectedPath = tmpDir + "/usb-ma2x8x.mvcmd";
+
+ std::ofstream mvcmd;
+ mvcmd.open(mvcmdExpectedPath, std::ios::out);
+
+ char mvcmdFilePath[MAX_PATH] = "";
+ mv_strcpy(mvcmdFilePath, MAX_PATH, tmpDir.c_str());
+
+ deviceDesc_t dummyDevDesc2480;
+ strcpy(dummyDevDesc2480.name, "0-ma2480");
+ dummyDevDesc2480.protocol = X_LINK_USB_VSC;
+ dummyDevDesc2480.platform = X_LINK_MYRIAD_X;
+
+ ASSERT_EQ(NC_OK, getFirmwarePath(mvcmdFilePath, MAX_PATH, dummyDevDesc2480));
+ ASSERT_STRCASEEQ(mvcmdExpectedPath.c_str(), mvcmdFilePath);
+}
--- /dev/null
+add_subdirectory(XLink)
--- /dev/null
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(TARGET_NAME "XLinkTests")
+set(CMAKE_CXX_STANDARD 11)
+
+include(${XLINK_DIR}/XLink.cmake)
+file(GLOB_RECURSE XLINK_TESTS_SOURCES "*.cpp")
+
+add_executable(${TARGET_NAME} ${XLINK_TESTS_SOURCES})
+
+target_include_directories(${TARGET_NAME}
+ PRIVATE
+ helpers
+ cases
+ ${IE_MAIN_SOURCE_DIR}/tests_new/gtest/googletest/include
+ ${IE_MAIN_SOURCE_DIR}/tests_new/gtest/googletest/
+ ${XLINK_INCLUDE}
+ ${XLINK_PLATFORM_INCLUDE})
+
+target_compile_definitions(${TARGET_NAME}
+ PRIVATE
+ __PC__)
+
+target_link_libraries(${TARGET_NAME}
+ PRIVATE
+ XLink gtest gtest_main)
+
+set_target_properties(${TARGET_NAME} PROPERTIES
+ POSITION_INDEPENDENT_CODE TRUE
+ COMPILE_PDB_NAME ${TARGET_NAME})
+
+add_dependencies(${TARGET_NAME} vpu_copy_firmware)
--- /dev/null
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "XLink_common_cases.hpp"
+
+#include <thread>
+
+//------------------------------------------------------------------------------
+// XLinkNullPtrTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkNullPtrTests, XLinkInitialize) {
+ ASSERT_EQ(XLinkInitialize(nullptr), X_LINK_ERROR);
+}
+
+TEST_F(XLinkNullPtrTests, XLinkConnect) {
+ ASSERT_EQ(XLinkConnect(nullptr), X_LINK_ERROR);
+}
+
+TEST_F(XLinkNullPtrTests, XLinkOpenAndCloseStream) {
+ ASSERT_EQ(XLinkOpenStream(0, nullptr, 0), X_LINK_ERROR);
+ ASSERT_EQ(XLinkCloseStream(0), X_LINK_ERROR);
+}
+
+TEST_F(XLinkNullPtrTests, XLinkFindDevice) {
+ ASSERT_EQ(XLinkFindFirstSuitableDevice(X_LINK_ANY_STATE, {}, nullptr), X_LINK_ERROR);
+ ASSERT_EQ(XLinkFindAllSuitableDevices(X_LINK_ANY_STATE, {}, nullptr, -1, nullptr),
+ X_LINK_ERROR);
+}
+
+TEST_F(XLinkNullPtrTests, XLinkWriteData) {
+ ASSERT_EQ(XLinkWriteData(0, nullptr, 0), X_LINK_ERROR);
+}
+
+//------------------------------------------------------------------------------
+// XLinkBootTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkBootTests, StressTestBootToOpenAndCloseDevice) {
+ if (getCountSpecificDevices(X_LINK_ANY_STATE, _protocol) == 0) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t deviceDesc = {};
+ deviceDesc_t in_deviceDesc = {};
+
+ in_deviceDesc.protocol = _protocol;
+ in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
+
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &deviceDesc));
+
+ std::string firmwarePath;
+ ASSERT_NO_THROW(firmwarePath = getMyriadFirmwarePath(deviceDesc));
+
+ for (int i = 0; i < 10; ++i) {
+ printf("Boot device. Iteration: %d\n", i);
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkBoot(&deviceDesc, firmwarePath.c_str()));
+ // FIXME: need to find a way to avoid this sleep
+ std::this_thread::sleep_for(kBootTimeoutSec);
+
+ // Find booted
+ deviceDesc_t bootedDeviceDesc = {};
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_BOOTED, in_deviceDesc, &bootedDeviceDesc));
+
+ XLinkHandler_t handler = {0};
+
+ connectToDevice(bootedDeviceDesc, &handler);
+ closeDevice(&handler);
+ }
+}
+
+//------------------------------------------------------------------------------
+// XLinkConnectUSBTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkConnectTests, ConnectToDevice) {
+ if (getCountSpecificDevices(X_LINK_UNBOOTED, _protocol) == 0) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t deviceDesc = {};
+ deviceDesc_t bootedDeviceDesc = {};
+
+ deviceDesc.protocol = _protocol;
+ bootDevice(deviceDesc, bootedDeviceDesc);
+
+ XLinkHandler_t handler = {0};
+ handler.protocol = bootedDeviceDesc.protocol;
+ handler.devicePath = bootedDeviceDesc.name;
+
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(&handler));
+ std::this_thread::sleep_for(std::chrono::seconds(2));
+
+ closeDevice(&handler);
+}
+
+//------------------------------------------------------------------------------
+// XLinkFindFirstSuitableDeviceTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkFindFirstSuitableDevicePlatformTests, ReturnCorrectAvailableDeviceName) {
+ if (getCountSpecificDevices(X_LINK_ANY_STATE, _protocol, _platform) == 0) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t deviceDesc = {};
+ deviceDesc_t in_deviceDesc = {};
+ in_deviceDesc.protocol = _protocol;
+ in_deviceDesc.platform = _platform;
+
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &deviceDesc));
+ ASSERT_TRUE(strlen(deviceDesc.name) > 2);
+ ASSERT_EQ(deviceDesc.protocol, in_deviceDesc.protocol);
+
+ if(_platform == X_LINK_ANY_PLATFORM) {
+ EXPECT_NE(deviceDesc.platform, X_LINK_ANY_PLATFORM);
+ } else {
+ EXPECT_EQ(deviceDesc.platform, _platform);
+ }
+
+ if(_protocol != X_LINK_USB_VSC) {
+ std::string deviceName(deviceDesc.name);
+ switch (_platform) {
+ case X_LINK_MYRIAD_2: {
+ EXPECT_TRUE(deviceName.find(kUSBMyriad2) != std::string::npos);
+ break;
+ }
+ case X_LINK_MYRIAD_X: {
+ EXPECT_TRUE(deviceName.find(kUSBMyriadX) != std::string::npos);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+TEST_P(XLinkFindFirstSuitableDeviceTests, CanFindDeviceByName) {
+ if (getCountSpecificDevices(X_LINK_ANY_STATE, _protocol) == 0) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t deviceDesc = {};
+ deviceDesc_t in_deviceDesc = {};
+
+ in_deviceDesc.protocol = _protocol;
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkFindFirstSuitableDevice(
+ X_LINK_ANY_STATE, in_deviceDesc, &deviceDesc));
+
+ deviceDesc_t deviceRequirementsWithName = {};
+ deviceRequirementsWithName.protocol = _protocol;
+ strcpy(deviceRequirementsWithName.name, deviceDesc.name);
+
+ deviceDesc_t deviceDescSearchByName = {};
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkFindFirstSuitableDevice(
+ X_LINK_ANY_STATE, deviceRequirementsWithName, &deviceDescSearchByName));
+
+ ASSERT_TRUE(strcmp(deviceDesc.name, deviceDescSearchByName.name) == 0);
+}
+
+/**
+ * This is temporary test.
+ * For now it's not clear how to tests multiple device as for now we don't have bench like this
+ */
+TEST_P(XLinkFindFirstSuitableDeviceTests, OnSecondIndexDeviceWillBeNotFound) {
+ auto availableDevices = getCountSpecificDevices(X_LINK_ANY_STATE, _protocol);
+ if (availableDevices != 1) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t in_deviceDesc = {};
+ in_deviceDesc.protocol = _protocol;
+
+ const int index = 1;
+ // Find device
+ deviceDesc_t deviceDesc = {};
+ ASSERT_EQ(X_LINK_DEVICE_NOT_FOUND, findDeviceOnIndex(
+ index, X_LINK_ANY_STATE, in_deviceDesc, &deviceDesc));
+}
+
+TEST_P(XLinkFindFirstSuitableDeviceTests, ReturnCorrectBootedDeviceName) {
+ if (getCountSpecificDevices(X_LINK_ANY_STATE, _protocol) == 0) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t bootedDevice = {};
+ deviceDesc_t deviceDesc = {};
+ deviceDesc.protocol = _protocol;
+
+ bootDevice(deviceDesc, bootedDevice);
+
+ deviceDesc_t foundDeviceDesc = {};
+ deviceDesc_t in_deviceDesc = {};
+ in_deviceDesc.protocol = _protocol;
+ in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
+
+ EXPECT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_BOOTED, in_deviceDesc, &foundDeviceDesc));
+
+ EXPECT_TRUE(strcmp(bootedDevice.name, foundDeviceDesc.name) == 0);
+ EXPECT_EQ(foundDeviceDesc.protocol, _protocol);
+
+ if(_protocol == X_LINK_USB_VSC) {
+ std::string foundDeviceName(foundDeviceDesc.name);
+ EXPECT_TRUE(foundDeviceName.find(kUSBMyriad2) == std::string::npos);
+ EXPECT_TRUE(foundDeviceName.find(kUSBMyriadX) == std::string::npos);
+ }
+
+ connectAndCloseDevice(bootedDevice);
+}
+
+//------------------------------------------------------------------------------
+// XLinkFindAllSuitableDevicesTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkFindAllSuitableDevicesTests, CanFindMoreThenTwoDeviceAnyState_USB_PCIE) {
+ if (getCountSpecificDevices(X_LINK_UNBOOTED) < 2) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t in_deviceDesc = {};
+ deviceDesc_t deviceDescArray[XLINK_MAX_DEVICES] = {{}};
+
+ in_deviceDesc.protocol = X_LINK_ANY_PROTOCOL;
+ unsigned int numOfFoundDevices = 0;
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindAllSuitableDevices(
+ X_LINK_ANY_STATE, in_deviceDesc, deviceDescArray,
+ XLINK_MAX_DEVICES, &numOfFoundDevices));
+
+ ASSERT_EQ(numOfFoundDevices, getCountSpecificDevices(X_LINK_UNBOOTED));
+ ASSERT_EQ(numOfFoundDevices,
+ getCountSpecificDevices(X_LINK_UNBOOTED, X_LINK_ANY_PROTOCOL, X_LINK_MYRIAD_2) +
+ getCountSpecificDevices(X_LINK_UNBOOTED, X_LINK_ANY_PROTOCOL, X_LINK_MYRIAD_X));
+}
+
+TEST_F(XLinkFindAllSuitableDevicesTests, CanFindTwoDeviceDifferentState_USB_PCIE) {
+ if (getCountSpecificDevices(X_LINK_UNBOOTED) < 2) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t in_deviceDesc = {};
+ in_deviceDesc.protocol = X_LINK_ANY_PROTOCOL;
+
+ // Find & boot one device
+ deviceDesc_t firstDeviceDesc = {};
+ deviceDesc_t bootedDeviceDesc = {};
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &firstDeviceDesc));
+ bootDevice(firstDeviceDesc, bootedDeviceDesc);
+
+ deviceDesc_t deviceDescArray[XLINK_MAX_DEVICES] = {{}};
+ unsigned int numOfFoundDevices = 0;
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindAllSuitableDevices(
+ X_LINK_ANY_STATE, in_deviceDesc, deviceDescArray,
+ XLINK_MAX_DEVICES, &numOfFoundDevices));
+
+ bool foundBootedDevice = false;
+ for (int i = 0; i < numOfFoundDevices; ++i) {
+ if (deviceDescArray[i].platform == X_LINK_ANY_PLATFORM)
+ foundBootedDevice = true;
+ }
+
+ EXPECT_GE(numOfFoundDevices, 2);
+ EXPECT_TRUE(foundBootedDevice);
+ EXPECT_EQ(numOfFoundDevices, getCountSpecificDevices(X_LINK_UNBOOTED) +
+ getCountSpecificDevices(X_LINK_BOOTED));
+
+ connectAndCloseDevice(bootedDeviceDesc);
+}
+
+//------------------------------------------------------------------------------
+// XLinkResetRemoteTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkResetRemoteTests, CanResetRemoteDevice) {
+ if (getCountSpecificDevices(X_LINK_UNBOOTED, _protocol) == 0) {
+ GTEST_SKIP();
+ }
+
+ XLinkHandler_t handler = {0};
+ deviceDesc_t deviceDesc = {};
+ deviceDesc_t bootedDeviceDesc = {};
+
+ deviceDesc.protocol = _protocol;
+ bootDevice(deviceDesc, bootedDeviceDesc);
+ connectToDevice(bootedDeviceDesc, &handler);
+
+ // Reset device
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler.linkId));
+ std::this_thread::sleep_for(kResetTimeoutSec);
+
+ // Make sure that device is really rebooted
+ deviceDesc_t foundDeviceDesc = {};
+ ASSERT_EQ(X_LINK_DEVICE_NOT_FOUND,
+ XLinkFindFirstSuitableDevice(X_LINK_BOOTED, deviceDesc, &foundDeviceDesc));
+}
+
+//------------------------------------------------------------------------------
+// XLinkResetAllTests
+//------------------------------------------------------------------------------
+TEST_P(XLinkResetAllTests, DISABLED_ResetBootedDevice) {
+ if (getCountSpecificDevices(X_LINK_UNBOOTED, _protocol) == 0) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t deviceDesc = {};
+ deviceDesc_t bootedDeviceDesc = {};
+
+ deviceDesc.protocol = _protocol;
+ bootDevice(deviceDesc, bootedDeviceDesc);
+
+ // Try to reset device
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkResetAll());
+ std::this_thread::sleep_for(kResetTimeoutSec);
+
+ deviceDesc.protocol = X_LINK_ANY_PROTOCOL;
+ deviceDesc_t afterResetBootedDescr = {};
+ ASSERT_EQ(X_LINK_DEVICE_NOT_FOUND,
+ XLinkFindFirstSuitableDevice(X_LINK_BOOTED, deviceDesc, &afterResetBootedDescr));
+}
+
+//------------------------------------------------------------------------------
+// XLinkOpenStreamTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkOpenStreamTests, CanOpenAndCloseStream) {
+ streamId_t stream = XLinkOpenStream(_handlerPtr.get()->linkId, "mySuperStream", 1024);
+ ASSERT_NE(INVALID_STREAM_ID, stream);
+ ASSERT_NE(INVALID_STREAM_ID_OUT_OF_MEMORY, stream);
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream));
+}
+
+// CannotOpenStreamMoreThanMemoryOnDevice
+TEST_P(XLinkOpenStreamTests, CannotOpenStreamMoreThanMemoryOnDevice) {
+ const int _512MB = 512 * 1024 * 1024;
+ streamId_t stream = XLinkOpenStream(_handlerPtr.get()->linkId, "mySuperStream", _512MB);
+ ASSERT_EQ(INVALID_STREAM_ID_OUT_OF_MEMORY, stream);
+}
+
+// FIXME: the test doesn't work
+// TODO: is it correct behavior, should we accept the same names
+TEST_P(XLinkOpenStreamTests, DISABLED_CannotOpenTwoStreamsWithTheSameName) {
+ const int _1KB = 1 * 1024;
+ const char streamName[] = "mySuperStream";
+ streamId_t stream0 = XLinkOpenStream(_handlerPtr.get()->linkId, streamName, _1KB);
+ ASSERT_NE(INVALID_STREAM_ID, stream0);
+
+ streamId_t stream1 = XLinkOpenStream(_handlerPtr.get()->linkId, streamName, _1KB);
+ ASSERT_EQ(INVALID_STREAM_ID, stream1);
+
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream0));
+}
+
+// FIXME: XLinkOpenStream doesn't allocate any memory on device
+TEST_P(XLinkOpenStreamTests, DISABLED_CannotOpenStreamsMoreThanMemoryOnDevice) {
+ const int _256MB = 256 * 1024 * 1024;
+ streamId_t stream0 = XLinkOpenStream(_handlerPtr.get()->linkId, "mySuperStream0", _256MB);
+ ASSERT_NE(INVALID_STREAM_ID, stream0);
+
+ streamId_t stream1 = XLinkOpenStream(_handlerPtr.get()->linkId, "mySuperStream1", _256MB);
+ ASSERT_EQ(INVALID_STREAM_ID, stream1);
+
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream0));
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream1));
+}
+
+//------------------------------------------------------------------------------
+// Initialization of XLinkCommonTests
+//------------------------------------------------------------------------------
+
+INSTANTIATE_TEST_CASE_P(
+ XLinkCommon,
+ XLinkBootTests,
+ Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+ Values(X_LINK_ANY_PLATFORM)),
+ XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+ XLinkCommon,
+ XLinkConnectTests,
+ Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+ Values(X_LINK_ANY_PLATFORM)),
+ XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+ XLinkCommon,
+ XLinkFindFirstSuitableDevicePlatformTests,
+ Combine(Values(X_LINK_USB_VSC),
+ Values(X_LINK_MYRIAD_2, X_LINK_MYRIAD_X, X_LINK_ANY_PLATFORM)),
+ XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+ XLinkCommonPCIE,
+ XLinkFindFirstSuitableDevicePlatformTests,
+ Combine(Values(X_LINK_PCIE),
+ Values(X_LINK_ANY_PLATFORM)),
+ XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+ XLinkCommon,
+ XLinkFindFirstSuitableDeviceTests,
+ Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+ Values(X_LINK_ANY_PLATFORM)),
+ XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+ XLinkCommon,
+ XLinkResetAllTests,
+ Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+ Values(X_LINK_ANY_PLATFORM)),
+ XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+ XLinkCommon,
+ XLinkResetRemoteTests,
+ Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+ Values(X_LINK_ANY_PLATFORM)),
+ XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+ XLinkCommon,
+ XLinkOpenStreamTests,
+ Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+ Values(X_LINK_ANY_PLATFORM)),
+ XLinkOpenStreamTests::getTestCaseName);
+
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "XLink_specific_cases.hpp"
+#include "usb_boot.h"
+#include <thread>
+
+//------------------------------------------------------------------------------
+// XLinkBootTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkBootUSBTests, CanBootConnectAndResetDevice_deprecated) {
+ if (getCountSpecificDevices(X_LINK_ANY_STATE, X_LINK_USB_VSC) == 0) {
+ GTEST_SKIP();
+ }
+
+ std::string firmwarePath;
+ char deviceName[XLINK_MAX_NAME_SIZE] = {0};
+ // Find device
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkGetDeviceName(0, deviceName, XLINK_MAX_NAME_SIZE));
+ ASSERT_NO_THROW(firmwarePath = getMyriadUSBFirmwarePath(deviceName));
+
+ // Boot it
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkBootRemote(deviceName, firmwarePath.c_str()));
+ // FIXME: need to find a way to avoid this sleep
+ std::this_thread::sleep_for(kBootTimeoutSec);
+
+ // Find booted
+ char bootedDeviceName[XLINK_MAX_NAME_SIZE] = {0};
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkGetDeviceNameExtended(0, bootedDeviceName, XLINK_MAX_NAME_SIZE, DEFAULT_OPENPID));
+
+ // Connect to device
+ XLinkHandler_t handler = {};
+ handler.protocol = X_LINK_USB_VSC;
+ handler.devicePath = bootedDeviceName;
+
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(&handler));
+ std::this_thread::sleep_for(std::chrono::seconds(2));
+
+ // Reset device
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler.linkId));
+ // FIXME: need to find a way to avoid this sleep
+ std::this_thread::sleep_for(kResetTimeoutSec);
+}
--- /dev/null
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cases/XLink_specific_cases.hpp"
+#include <thread>
+
+//------------------------------------------------------------------------------
+// XLinkFindFirstSuitableDeviceUSBTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkFindFirstSuitableDeviceUSBTests, CanFindBootedDeviceByName) {
+ if (getCountSpecificDevices(X_LINK_UNBOOTED, X_LINK_USB_VSC) == 0) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t deviceDesc = {};
+ deviceDesc_t bootedDeviceDesc = {};
+
+ bootDevice(deviceDesc, bootedDeviceDesc);
+
+ deviceDesc_t foundDeviceDescr = {};
+ EXPECT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_ANY_STATE, bootedDeviceDesc, &foundDeviceDescr));
+
+ EXPECT_TRUE(strcmp(bootedDeviceDesc.name, foundDeviceDescr.name) == 0);
+
+ connectAndCloseDevice(bootedDeviceDesc);
+}
+
+//------------------------------------------------------------------------------
+// XLinkBootUSBTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkBootUSBTests, DeviceNameChangedAfterBoot) {
+ if (getCountSpecificDevices(X_LINK_UNBOOTED, X_LINK_USB_VSC) == 0) {
+ GTEST_SKIP();
+ }
+
+ deviceDesc_t unbootedDeviceDescr = {};
+ deviceDesc_t in_deviceDesc = {};
+ in_deviceDesc.protocol = X_LINK_USB_VSC;
+ in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
+
+ // Get device name
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &unbootedDeviceDescr));
+ std::string firmwarePath;
+ ASSERT_NO_THROW(firmwarePath = getMyriadFirmwarePath(unbootedDeviceDescr));
+
+ // Boot device
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkBoot(&unbootedDeviceDescr, firmwarePath.c_str()));
+ std::this_thread::sleep_for(kBootTimeoutSec);
+
+ // Booted device appear
+ deviceDesc_t bootedDeviceDesc = {};
+ EXPECT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_BOOTED, in_deviceDesc, &bootedDeviceDesc));
+
+ // The device is not in unbooted and booted list at the same time
+ deviceDesc_t foundDeviceDesc = {};
+ EXPECT_EQ(X_LINK_DEVICE_NOT_FOUND,
+ XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, unbootedDeviceDescr, &foundDeviceDesc));
+
+ connectAndCloseDevice(bootedDeviceDesc);
+}
+
+
+//------------------------------------------------------------------------------
+// XLinkFindPCIEDeviceTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkPCIEDeviceTests, CannotFindSameDeviceTwice) {
+ if (getCountSpecificDevices(X_LINK_ANY_STATE, X_LINK_PCIE) == 0)
+ GTEST_SKIP();
+
+ deviceDesc_t deviceRequirements = {};
+ deviceRequirements.protocol = X_LINK_PCIE;
+ deviceRequirements.platform = X_LINK_ANY_PLATFORM;
+
+ deviceDesc_t deviceDescFirst = {};
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_ANY_STATE, deviceRequirements, &deviceDescFirst));
+
+ // On index 1
+ deviceDesc_t deviceDescSecond = {};
+ XLinkError_t rc = findDeviceOnIndex(
+ 1, X_LINK_ANY_STATE, deviceRequirements, &deviceDescSecond);
+
+ if (rc != X_LINK_DEVICE_NOT_FOUND) {
+ ASSERT_EQ(rc, X_LINK_SUCCESS);
+ ASSERT_TRUE(strstr(deviceDescFirst.name, PCIE_NAME_SUBSTR) != nullptr);
+ ASSERT_TRUE(strstr(deviceDescSecond.name, PCIE_NAME_SUBSTR) != nullptr);
+ ASSERT_TRUE(strcmp(deviceDescFirst.name, deviceDescSecond.name) != 0);
+ }
+}
+
+/**
+ * This is real test for two multi-device case, require two PCIe cards
+ * Boot second and expect that first will be unbooted, second booted
+ */
+TEST_F(XLinkPCIEDeviceTests, DISABLED_CanFindFirstDeviceAfterBootSecond) {
+ if (getCountSpecificDevices(X_LINK_ANY_STATE, X_LINK_PCIE) == 0)
+ GTEST_SKIP();
+
+ // TODO Add check that there two devices
+ deviceDesc_t deviceRequirements = {};
+ deviceRequirements.protocol = X_LINK_PCIE;
+ deviceRequirements.platform = X_LINK_ANY_PLATFORM;
+
+ // Find first device
+ deviceDesc_t firstDeviceDesc = {};
+ ASSERT_EQ(X_LINK_SUCCESS, findDeviceOnIndex(
+ 0, X_LINK_ANY_STATE, deviceRequirements, &firstDeviceDesc));
+
+ // Find second device
+ deviceDesc_t secondDeviceDesc = {};
+ ASSERT_EQ(X_LINK_SUCCESS, findDeviceOnIndex(
+ 1, X_LINK_ANY_STATE, deviceRequirements, &secondDeviceDesc));
+
+ // Boot second device
+ std::string firmwarePath;
+ ASSERT_NO_THROW(firmwarePath = getMyriadFirmwarePath(deviceRequirements));
+
+ EXPECT_EQ(X_LINK_SUCCESS, XLinkBoot(&secondDeviceDesc, firmwarePath.c_str()));
+ std::this_thread::sleep_for(kBootTimeoutSec);
+
+ // Check that first still in unbooted state
+ deviceDesc_t firstDeviceDescAfter = {};
+ firstDeviceDescAfter.protocol = X_LINK_PCIE;
+ firstDeviceDescAfter.platform = X_LINK_ANY_PLATFORM;
+
+
+ EXPECT_EQ(X_LINK_SUCCESS, findDeviceOnIndex(
+ 0, X_LINK_UNBOOTED, firstDeviceDesc, &firstDeviceDescAfter));
+
+ // Check that second device now in booted state
+ deviceDesc_t secondDeviceDescAfter = {};
+ secondDeviceDescAfter.protocol = X_LINK_PCIE;
+ secondDeviceDescAfter.platform = X_LINK_ANY_PLATFORM;
+
+ EXPECT_EQ(X_LINK_SUCCESS, findDeviceOnIndex(
+ 0, X_LINK_BOOTED, secondDeviceDesc, &secondDeviceDescAfter));
+
+ // TODO Move it to separate function
+ // Close second device
+ XLinkHandler_t handler = {0};
+ handler.protocol = secondDeviceDesc.protocol;
+ handler.devicePath = secondDeviceDesc.name;
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(&handler));
+
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler.linkId));
+ std::this_thread::sleep_for(kResetTimeoutSec);
+}
--- /dev/null
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include "XLink_common_cases.hpp"
+
+#include <thread>
+
+static XLinkGlobalHandler_t globalHandler;
+
+//------------------------------------------------------------------------------
+// Implementation of methods of class XLinkTests
+//------------------------------------------------------------------------------
+
+void XLinkTests::SetUpTestCase() {
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkInitialize(&globalHandler));
+
+ // Deprecated field usage. Begin.
+ globalHandler.protocol = USB_VSC;
+ // Deprecated field usage. End.
+
+ // Waiting for initialization
+ std::this_thread::sleep_for(std::chrono::seconds(1));
+}
+
+//------------------------------------------------------------------------------
+// Implementation of methods of class XLinkDeviceTestsCommon
+//------------------------------------------------------------------------------
+
+static std::string protocolToString(XLinkProtocol_t protocol) {
+ switch (protocol) {
+ case X_LINK_USB_VSC:
+ return std::string("USB");
+ case X_LINK_PCIE:
+ return std::string("PCIE");
+ default:
+ return std::string("ANY");
+ }
+}
+
+static std::string platformToString(XLinkPlatform_t platform) {
+ switch (platform) {
+ case X_LINK_MYRIAD_2:
+ return std::string("Myriad2");
+ case X_LINK_MYRIAD_X:
+ return std::string("MyriadX");
+ default:
+ return std::string("ANY");
+ }
+}
+
+std::string XLinkDeviceTestsCommon::getTestCaseName(
+ const TestParamInfo<XLinkDeviceTestsCommonParam::ParamType>& param) {
+ XLinkProtocol_t protocol = get<0>(param.param);
+ XLinkPlatform_t platform = get<1>(param.param);
+
+ return "protocol=" + protocolToString(protocol) +
+ "_platform=" + platformToString(platform);
+}
+
+void XLinkDeviceTestsCommon::SetUp() {
+ _protocol = get<0>(XLinkDeviceTestsCommonParam::GetParam());
+ _platform = get<1>(XLinkDeviceTestsCommonParam::GetParam());
+}
+
+//------------------------------------------------------------------------------
+// Implementation of methods of class XLinkOpenStreamUSBTests
+//------------------------------------------------------------------------------
+
+std::string XLinkOpenStreamTests::getTestCaseName(
+ const TestParamInfo<XLinkDeviceTestsCommonParam::ParamType>& param) {
+ const auto name = XLinkDeviceTestsCommon::getTestCaseName(param);
+
+ XLinkProtocol_t protocol = get<0>(param.param);
+ if (getCountSpecificDevices(X_LINK_UNBOOTED, protocol) == 0) {
+ return "DISABLED_" + name;
+ }
+
+ return name;
+}
+
+XLinkOpenStreamTests::XLinkOpenStreamTests() : _handlerPtr(new XLinkHandler_t()) {
+}
+
+void XLinkOpenStreamTests::SetUp() {
+ XLinkDeviceTestsCommon::SetUp();
+
+ _deviceDesc.protocol = _protocol;
+ _deviceDesc.platform = _platform;
+
+ XLinkTestsHelper::bootDevice(_deviceDesc, _bootedDesc);
+ XLinkTestsHelper::connectToDevice(_bootedDesc, _handlerPtr.get());
+}
+
+void XLinkOpenStreamTests::TearDown() {
+ XLinkTestsHelper::closeDevice(_handlerPtr.get());
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#pragma once
+
+#include "XLink.h"
+#include "XLink_tests_helpers.hpp"
+
+#include "gtest/gtest.h"
+#include <memory>
+
+using namespace ::testing;
+using XLinkDeviceTestsCommonParam = WithParamInterface<std::tuple<XLinkProtocol_t, XLinkPlatform_t>>;
+
+//------------------------------------------------------------------------------
+// class XLinkTests
+//------------------------------------------------------------------------------
+class XLinkTests : public ::testing::Test,
+ protected XLinkTestsHelper {
+public:
+ static void SetUpTestCase();
+};
+
+//------------------------------------------------------------------------------
+// class XLinkNullPtrTests
+//------------------------------------------------------------------------------
+class XLinkNullPtrTests: public XLinkTests {};
+
+//------------------------------------------------------------------------------
+// class XLinkFindAllSuitableDevicesTests
+//------------------------------------------------------------------------------
+
+class XLinkFindAllSuitableDevicesTests : public XLinkTests {};
+
+
+//------------------------------------------------------------------------------
+// class XLinkCommonTests
+//------------------------------------------------------------------------------
+class XLinkDeviceTestsCommon : public XLinkTests,
+ public XLinkDeviceTestsCommonParam {
+public:
+ //Operations
+ static std::string getTestCaseName(
+ const TestParamInfo<XLinkDeviceTestsCommonParam::ParamType>& param);
+
+ void SetUp() override;
+
+protected:
+ XLinkProtocol_t _protocol;
+ XLinkPlatform_t _platform;
+};
+
+//------------------------------------------------------------------------------
+// class XLinkBootUSBTests
+//------------------------------------------------------------------------------
+
+class XLinkBootTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+// class XLinkConnectTests
+//------------------------------------------------------------------------------
+class XLinkConnectTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+// class XLinkFindFirstSuitableDeviceTests
+//------------------------------------------------------------------------------
+
+class XLinkFindFirstSuitableDeviceTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+// class XLinkFindFirstSuitableBootedDeviceTests
+//------------------------------------------------------------------------------
+
+class XLinkFindFirstSuitableDevicePlatformTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+// class XLinkResetRemoteTests
+//------------------------------------------------------------------------------
+class XLinkResetRemoteTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+// class XLinkResetAllTests
+//------------------------------------------------------------------------------
+class XLinkResetAllTests : public XLinkDeviceTestsCommon {};
+
+
+
+//------------------------------------------------------------------------------
+// class XLinkOpenStreamTests
+//------------------------------------------------------------------------------
+
+class XLinkOpenStreamTests : public XLinkDeviceTestsCommon {
+public:
+ //Operations
+ static std::string getTestCaseName(
+ const TestParamInfo<XLinkDeviceTestsCommonParam::ParamType>& param);
+
+protected:
+ XLinkOpenStreamTests();
+
+ void SetUp() override;
+ void TearDown() override;
+
+protected:
+ std::unique_ptr<XLinkHandler_t> _handlerPtr;
+ deviceDesc_t _deviceDesc = {};
+ deviceDesc_t _bootedDesc = {};
+};
--- /dev/null
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "XLink_common_cases.hpp"
+
+//------------------------------------------------------------------------------
+// class XLinkBootUSBTests
+//------------------------------------------------------------------------------
+
+class XLinkBootUSBTests : public XLinkTests {};
+
+//------------------------------------------------------------------------------
+// class XLinkFindFirstSuitableDeviceUSBTests
+//------------------------------------------------------------------------------
+
+class XLinkFindFirstSuitableDeviceUSBTests : public XLinkTests {};
+
+//------------------------------------------------------------------------------
+// class XLinkFindPCIEDeviceTests
+//------------------------------------------------------------------------------
+
+class XLinkPCIEDeviceTests: public XLinkTests {};
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "XLink_tests_helpers.hpp"
+#include <thread>
+
+//------------------------------------------------------------------------------
+// Implementation of methods of class XLinkTestsHelpersBoot
+//------------------------------------------------------------------------------
+
+void XLinkTestsHelper::bootDevice(const deviceDesc_t& in_deviceDesc, deviceDesc_t& out_bootedDeviceDesc) {
+ deviceDesc_t tmp_deviceDesc = {};
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &tmp_deviceDesc));
+
+ std::string firmwarePath;
+ ASSERT_NO_THROW(firmwarePath = getMyriadFirmwarePath(tmp_deviceDesc));
+ printf("Would boot (%s) device with firmware (%s) \n", tmp_deviceDesc.name, firmwarePath.c_str());
+
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkBoot(&tmp_deviceDesc, firmwarePath.c_str()));
+ // FIXME: need to find a way to avoid this sleep
+ std::this_thread::sleep_for(kBootTimeoutSec);
+
+ // Check, that device booted
+ tmp_deviceDesc.platform = X_LINK_ANY_PLATFORM;
+ memset(tmp_deviceDesc.name, 0, XLINK_MAX_NAME_SIZE);
+ ASSERT_EQ(X_LINK_SUCCESS,
+ XLinkFindFirstSuitableDevice(X_LINK_BOOTED, tmp_deviceDesc, &out_bootedDeviceDesc));
+}
+
+void XLinkTestsHelper::connectToDevice(deviceDesc_t& in_bootedDeviceDesc, XLinkHandler_t* out_handler) {
+ if (!out_handler){
+ GTEST_FAIL();
+ }
+
+ memset(out_handler, 0, sizeof(XLinkHandler_t));
+ out_handler->protocol = in_bootedDeviceDesc.protocol;
+ out_handler->devicePath = in_bootedDeviceDesc.name;
+
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(out_handler));
+}
+
+void XLinkTestsHelper::closeDevice(XLinkHandler_t* handler) {
+ ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler->linkId));
+ std::this_thread::sleep_for(kResetTimeoutSec);
+
+ // Make sure that device is closed
+ deviceDesc_t deviceDesc = {};
+ deviceDesc.protocol = handler->protocol;
+ deviceDesc.platform = X_LINK_ANY_PLATFORM;
+ strcpy(deviceDesc.name, handler->devicePath);
+
+ deviceDesc_t foundDeviceDesc = {};
+ ASSERT_EQ(X_LINK_DEVICE_NOT_FOUND,
+ XLinkFindFirstSuitableDevice(X_LINK_BOOTED, deviceDesc, &foundDeviceDesc));
+}
+
+void XLinkTestsHelper::connectAndCloseDevice(deviceDesc_t& in_bootedDeviceDesc) {
+ XLinkHandler_t handler = {0};
+
+ connectToDevice(in_bootedDeviceDesc, &handler);
+ closeDevice(&handler);
+}
+
+std::string XLinkTestsHelper::getMyriadUSBFirmwarePath(const std::string& deviceName) {
+ if (deviceName.find('-') == std::string::npos) {
+ throw std::invalid_argument("Invalid device address");
+ }
+
+ if (deviceName.find("ma2480") != std::string::npos) {
+ return FIRMWARE_SUBFOLDER + std::string("usb-ma2x8x.mvcmd");
+ }
+
+ return FIRMWARE_SUBFOLDER + std::string("usb-ma2450.mvcmd");
+}
+
+std::string XLinkTestsHelper::getMyriadFirmwarePath(const deviceDesc_t& in_deviceDesc) {
+ if(in_deviceDesc.protocol != X_LINK_USB_VSC &&
+ in_deviceDesc.protocol != X_LINK_PCIE) {
+ throw std::invalid_argument("Device protocol must be specified");
+ }
+
+ if(in_deviceDesc.protocol == X_LINK_PCIE) {
+#if defined(_WIN32)
+ return FIRMWARE_SUBFOLDER + std::string("pcie-ma248x.elf");
+#else
+ return FIRMWARE_SUBFOLDER + std::string("pcie-ma248x.mvcmd");
+#endif
+ }
+
+ return getMyriadUSBFirmwarePath(in_deviceDesc.name);
+}
+
+XLinkError_t XLinkTestsHelper::findDeviceOnIndex(
+ const int index,
+ const XLinkDeviceState_t deviceState,
+ const deviceDesc_t in_deviceRequirements,
+ deviceDesc_t *out_foundDevicesPtr) {
+
+ deviceDesc_t deviceDescArray[XLINK_MAX_DEVICES] = {};
+ unsigned int foundDevices = 0;
+ XLinkError_t rc = XLinkFindAllSuitableDevices(
+ deviceState, in_deviceRequirements, deviceDescArray, XLINK_MAX_DEVICES, &foundDevices);
+
+ if (rc != X_LINK_SUCCESS) {
+ return rc;
+ }
+
+ if (foundDevices <= index) {
+ return X_LINK_DEVICE_NOT_FOUND;
+ }
+
+ out_foundDevicesPtr->platform = deviceDescArray[index].platform;
+ out_foundDevicesPtr->protocol = deviceDescArray[index].protocol;
+ strncpy(out_foundDevicesPtr->name, deviceDescArray[index].name, XLINK_MAX_NAME_SIZE);
+ return X_LINK_SUCCESS;
+}
+
+int XLinkTestsHelper::getCountSpecificDevices(const XLinkDeviceState_t state,
+ const XLinkProtocol_t deviceProtocol,
+ const XLinkPlatform_t devicePlatform) {
+ deviceDesc_t req_deviceDesc = {};
+ req_deviceDesc.protocol = deviceProtocol;
+ req_deviceDesc.platform = devicePlatform;
+
+ deviceDesc_t deviceDescArray[XLINK_MAX_DEVICES] = {};
+ unsigned int foundDevices = 0;
+ XLinkFindAllSuitableDevices(
+ state, req_deviceDesc, deviceDescArray, XLINK_MAX_DEVICES, &foundDevices);
+
+ return foundDevices;
+}
--- /dev/null
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "XLink.h"
+#include "XLinkPrivateDefines.h"
+
+#include <gtest/gtest.h>
+#include <chrono>
+#include <string>
+
+//------------------------------------------------------------------------------
+// Defines
+//------------------------------------------------------------------------------
+
+using std_seconds = std::chrono::seconds;
+
+#if (defined(_WIN32) || defined(_WIN64))
+ static constexpr char PCIE_NAME_SUBSTR[] = "mxlink";
+ static constexpr char FIRMWARE_SUBFOLDER[] = "./";
+#else
+ static constexpr char PCIE_NAME_SUBSTR[] = "mxlk";
+ static constexpr char FIRMWARE_SUBFOLDER[] = "./lib/";
+#endif
+
+//------------------------------------------------------------------------------
+// Helpers
+//------------------------------------------------------------------------------
+
+constexpr std::chrono::seconds operator "" _sec(unsigned long long s)
+{
+ return std::chrono::seconds(s);
+}
+
+//------------------------------------------------------------------------------
+// class XLinkTestsHelper
+//------------------------------------------------------------------------------
+
+class XLinkTestsHelper {
+public:
+ const std_seconds kBootTimeoutSec = 2_sec;
+ const std_seconds kResetTimeoutSec = 5_sec;
+
+ const std::string kUSBMyriadX = "ma2480";
+ const std::string kUSBMyriad2 = "ma2450";
+
+ // Device management
+ void bootDevice(const deviceDesc_t& in_deviceDesc, deviceDesc_t& out_bootedDeviceDesc);
+
+ void connectToDevice(deviceDesc_t& in_bootedDeviceDesc, XLinkHandler_t* out_handler);
+ void closeDevice(XLinkHandler_t* handler);
+
+ void connectAndCloseDevice(deviceDesc_t& in_bootedDeviceDesc);
+
+ // Firmware
+ std::string getMyriadUSBFirmwarePath(const std::string& deviceName);
+ std::string getMyriadFirmwarePath(const deviceDesc_t& in_deviceDesc);
+
+ // Device searching
+ XLinkError_t findDeviceOnIndex(const int index,
+ const XLinkDeviceState_t deviceState,
+ const deviceDesc_t in_deviceRequirements,
+ deviceDesc_t *out_foundDevicesPtr);
+
+
+ static int getCountSpecificDevices(const XLinkDeviceState_t state = X_LINK_ANY_STATE,
+ const XLinkProtocol_t deviceProtocol = X_LINK_ANY_PROTOCOL,
+ const XLinkPlatform_t devicePlatform = X_LINK_ANY_PLATFORM);
+};
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_tool
DESTINATION deployment_tools/tools
COMPONENT python_tools)
+ install(FILES
+ COMPONENT python_tools)
endif()
--- /dev/null
+# Cross Check Tool
+
+Cross Check Tool is a console application that enables comparing accuracy and performance metrics for two successive
+model inferences that are performed on two different supported Intel® devices or with different precisions.
+The Cross Check Tool can compare the metrics per layer or all over the model.
+
+## Running the Cross Check Tool
+
+Cross Check Tool is distributed as a Python module and there is no need to build it. To run the Cross Check Tool,
+execute the `cross_check_tool.py` file with necessary parameters. Please note that the Inference Engine assumes that weights
+are in the same folder as the `.xml` file.
+
+You can get the list of all available options using the `-h` option:
+
+```sh
+$python3 cross_check_tool.py -h
+
+Cross Check Tool is a console application that enables comparing accuracy and
+provides performance metrics
+
+optional arguments:
+ -h, --help show this help message and exit
+
+Model specific arguments:
+ --input INPUT, -i INPUT
+ Path to an input image file or multi-input file to
+ infer. Generates input(s) from normal distribution if
+ empty
+ --batch BATCH, -b BATCH
+ Overrides batch size. Default is inherited from model
+ --model MODEL, -m MODEL
+ Path to an .xml file that represents the first IR of
+ the trained model to infer.
+ --reference_model REFERENCE_MODEL, -ref_m REFERENCE_MODEL
+ Path to an .xml file that represents the second IR in
+ different precision to compare the metrics.
+ --layers LAYERS, -layers LAYERS
+ Defines layers to check. Options: all, None - for
+ output layers check, list of comma-separated layer
+ names to check. Default value is None.
+ --mapping MAPPING, -map MAPPING
+ Model Optimizer provided mapping for --model/-m
+ --reference_mapping REFERENCE_MAPPING, -ref_map REFERENCE_MAPPING
+ Model Optimizer provided mapping for
+ --reference_model/-ref_model
+ --num_of_iterations NUM_OF_ITERATIONS, -ni NUM_OF_ITERATIONS
+ Number of iterations to collect all over the net
+ performance
+
+Plugin specific arguments:
+ --plugin_path PLUGIN_PATH, -pp PLUGIN_PATH
+ Path to a plugin folder.
+ --device DEVICE, -d DEVICE
+ The first target device to infer the model specified
+ with the -m or --model option. CPU, GPU, HDDL or
+ MYRIAD are acceptable.
+ --config CONFIG, -conf CONFIG
+ Path to config file for -d or -device device plugin
+ --reference_device REFERENCE_DEVICE, -ref_d REFERENCE_DEVICE
+ The second target device to infer the model and
+ compare the metrics. CPU, GPU, HDDL or MYRIAD are
+ acceptable.
+ --reference_config REFERENCE_CONFIG, -ref_conf REFERENCE_CONFIG
+ Path to config file for -ref_d or -reference_device
+ device plugin
+ -l L Required for MKLDNN (CPU)-targeted custom layers.
+ Comma separated paths to a shared libraries with the
+ kernels implementation.
+
+CCT mode arguments:
+ --dump Enables blobs statistics dumping
+ --load LOAD Path to a file to load blobs from
+
+```
+### Examples
+
+1. To check per-layer accuracy and performance of inference in FP32 precision on the CPU against the GPU, run:
+ ```sh
+ $python3 cross_check_tool.py -i <path_to_input_image_or_multi_input_file> \
+ -m <path_to_FP32_xml> \
+ -d GPU \
+ -ref_d CPU \
+ --layers all
+ ```
+
+ The output looks as follows:
+ ```sh
+ [ INFO ] Cross check with one IR was enabled
+ [ INFO ] GPU:FP32 vs CPU:FP32
+ [ INFO ] The same IR on both devices: <path_to_IR>
+ [ INFO ] Statistics will be dumped for X layers: <layer_1_name>, <layer_2_name>, ... , <layer_X_name>
+ [ INFO ] Layer <layer_1_name> statistics
+ Max absolute difference : 1.15204E-03
+ Min absolute difference : 0.0
+ Max relative difference : 1.15204E+17
+ Min relative difference : 0.0
+ Min reference value : -1.69513E+03
+ Min absolute reference value : 2.71080E-06
+ Max reference value : 1.17132E+03
+ Max absolute reference value : 1.69513E+03
+ Min actual value : -1.69513E+03
+ Min absolute actual value : 8.66465E-05
+ Max actual value : 1.17132E+03
+ Max absolute actual value : 1.69513E+03
+ Device: -d GPU -ref_d CPU
+ Status: OPTIMIZED_OUT OPTIMIZED_OUT
+ Layer type: Convolution Convolution
+ Real time, microsec: 0 120
+ Number of NAN: 0 0
+ Number of INF: 0 0
+ Number of ZERO: 0 0
+ ...
+ <list_of_layer_statistics>
+ ...
+
+ [ INFO ] Overall max absolute difference = 0.00115203857421875
+ [ INFO ] Overall min absolute difference = 0.0
+ [ INFO ] Overall max relative difference = 1.1520386483093504e+17
+ [ INFO ] Overall min relative difference = 0.0
+ [ INFO ] Execution successful
+ ```
+
+2. To check the overall accuracy and performance of inference on the CPU in FP32 precision against the
+ Intel® Movidius™ Myriad™ device in FP16 precision, run:
+ ```sh
+ $python3 cross_check_tool.py -i <path_to_input_image_or_multi_input_file> \
+ -m <path_to_FP16_xml> \
+ -d MYRIAD \
+ -ref_m <path_to_FP32_xml> \
+ -ref_d CPU
+ ```
+
+ The output looks as follows:
+ ```sh
+ [ INFO ] Cross check with two IRs was enabled
+ [ INFO ] GPU:FP16 vs CPU:FP32
+ [ INFO ] IR for MYRIAD : <path_to_FP16_xml>
+ [ INFO ] IR for CPU : <path_to_FP32_xml>
+ [ INFO ] Statistics will be dumped for 1 layer: <output_layer_name(s)>
+ [ INFO ] Layer <output_layer_name> statistics
+ Max absolute difference : 2.32944E-02
+ Min absolute difference : 3.63002E-13
+ Max relative difference : 6.41717E+10
+ Min relative difference : 1.0
+ Min reference value : 3.63002E-13
+ Min absolute reference value : 3.63002E-13
+ Max reference value : 7.38138E-01
+ Max absolute reference value : 7.38138E-01
+ Min actual value : 0.0
+ Min absolute actual value : 0.0
+ Max actual value : 7.14844E-01
+ Max absolute actual value : 7.14844E-01
+ Device: -d MYRIAD -ref_d CPU
+ Status: OPTIMIZED_OUT OPTIMIZED_OUT
+ Layer type: Reshape Reshape
+ Real time, microsec: 0 0
+ Number of NAN: 0 0
+ Number of INF: 0 0
+ Number of ZERO: 0 0
+ ----------------------------------------------------------------------
+ Overall performance, microseconds: 2.79943E+05 6.24670E+04
+ ----------------------------------------------------------------------
+ [ INFO ] Overall max absolute difference = 0.023294448852539062
+ [ INFO ] Overall min absolute difference = 3.630019191052519e-13
+ [ INFO ] Overall max relative difference = 64171696128.0
+ [ INFO ] Overall min relative difference = 1.0
+ [ INFO ] Execution successful
+ ```
+
+3. To dump layer statistics from a specific list of layers, run:
+ ```sh
+ $python3 cross_check_tool.py -i <path_to_input_image_or_multi_input_file> \
+ -m <path_to_FP16_xml> \
+ -d MYRIAD \
+ --dump \
+ --layers <comma_separated_list_of_layers>
+ ```
+
+ The output looks as follows:
+ ```sh
+ [ INFO ] Dump mode was enabled
+ [ INFO ] <layer_1_name> layer processing
+ ...
+ [ INFO ] <layer_X_name> layer processing
+ [ INFO ] Dump file path: <path_where_dump_will_be_saved>
+ [ INFO ] Execution successful
+ ```
+
+ If you do not provide the `-i` key, the Cross Check Tool generates an input from normal distributed noise and saves
+ it in a multi-input file format with the filename `<path_to_xml>_input_layers_dump.txt` in the same folder as the Intermediate Representation (IR).
+
+4. To check the overall accuracy and performance of inference on the CPU in FP32 precision against dumped results, run:
+ ```sh
+ $python3 cross_check_tool.py -i <path_to_input_image_or_multi_input_file> \
+ -m <path_to_FP32_xml> \
+ -d CPU \
+ --load <path_to_dump> \
+ --layers all
+ ```
+
+ The output looks as follows:
+ ```sh
+ [ INFO ] Load mode was enabled
+ [ INFO ] IR for CPU : <path_to_FP32_xml>
+ [ INFO ] Loading blob from /localdisk/models/FP16/icv_squeezenet_v1.0.xml_GPU_dump.npz
+ [ INFO ] Statistics will be dumped for X layers: <layer_1_name>, <layer_2_name>, ... , <layer_X_name>
+ [ INFO ] Layer <layer_1_name> statistics
+ Max absolute difference : 0.0
+ Min absolute difference : 0.0
+ Max relative difference : 0.0
+ Min relative difference : 0.0
+ Min reference value : 0.0
+ Min absolute reference value : 0.0
+ Max reference value : 7.14844E-01
+ Max absolute reference value : 7.14844E-01
+ Min actual value : 0.0
+ Min absolute actual value : 0.0
+ Max actual value : 7.14844E-01
+ Max absolute actual value : 7.14844E-01
+ Device: -d CPU -load GPU
+ Status: OPTIMIZED_OUT OPTIMIZED_OUT
+ Layer type: Reshape Reshape
+ Real time, microsec: 0 0
+ Number of NAN: 0 0
+ Number of INF: 0 0
+ Number of ZERO: 609 699
+
+ ...
+ <list_of_layer_statistics>
+ ...
+
+ [ INFO ] Overall max absolute difference = 0.0
+ [ INFO ] Overall min absolute difference = 0.0
+ [ INFO ] Overall max relative difference = 0.0
+ [ INFO ] Overall min relative difference = 0.0
+ [ INFO ] Execution successful
+ ```
+
+### Multi-input and dump file format
+
+Multi-input and dump file is a numpy compressed `.npz` file with hierarchy:
+
+```sh
+{
+ ‘layer_name’: {
+ ‘blob’: np.array([…])
+ ‘pc’: {
+ ‘device’: ‘device_name’,
+ ‘real_time’: int_real_time_in_microseconds_from_plugin,
+ ‘exec_type’: ‘exec_type_from_plugin’,
+ ‘layer_type’: ‘layer_type_from_plugin’,
+ ‘status’: ‘status_from_plugin’
+ }
+ },
+ ‘another_layer_name’: {
+ ‘blob’: np.array([…])
+ ‘pc’: {
+ ‘device’: ‘device_name’,
+ ‘real_time’: int_real_time_in_microseconds_from_plugin,
+ ‘exec_type’: ‘exec_type_from_plugin’,
+ ‘layer_type’: ‘layer_type_from_plugin’,
+ ‘status’: ‘status_from_plugin’
+ }
+ },
+ ...
+}
+```
+
+### Configuration file
+
+There is an option to pass configuration file to plugin by providing
+`--config` and/or `--reference_config` keys.
+
+Configuration file is a text file with content of pairs of keys and values.
+
+Structure of configuration file:
+
+```sh
+KEY VALUE
+ANOTHER_KEY ANOTHER_VALUE,VALUE_1
+```
--- /dev/null
+"""
+Copyright (C) 2018-2020 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
--- /dev/null
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+import datetime
+import logging as log
+import os
+import sys
+
+import numpy as np
+
+try:
+ from openvino import inference_engine as ie
+ from openvino.inference_engine import IENetwork, IECore
+except Exception as e:
+ exception_type = type(e).__name__
+ print("The following error happened while importing Python API module:\n[ {} ] {}".format(exception_type, e))
+ sys.exit(1)
+
+from utils import get_config_dictionary, get_layers_list, print_output_layers, input_processing, \
+ accuracy_metrics, validate_args, build_parser, set_logger, find_out_cct_mode, print_all_over_the_net_metrics, \
+ update_global_accuracy_matrics, blob_counters, performance_metrics, manage_user_outputs_with_mapping, \
+ dump_output_file, load_dump, error_handling, print_input_layers, set_verbosity
+
+
+###
+# PLUGIN
+###
+
+
+@error_handling('plugin of \'{plugin.device}\' device config \'{config}\' loading')
+def set_plugin_config(core: IECore, device : str, config: str = None):
+ core.set_config(get_config_dictionary(config_file=config), device_name=device)
+
+
+@error_handling('\'{cpu_ext}\' cpu extensions loading')
+def set_cpu_extensions(core: IECore, cpu_ext: str):
+ core.add_extension(cpu_ext, "CPU")
+
+
+def get_plugin(device: str, cpu_ext: str = None, config: str = None):
+ ie = IECore()
+ # log.info('{} plugin:\n API version ............ {}'.format(device, plugin.version), extra={'no_lvl': True})
+ set_plugin_config(core=ie, device=device, config=config)
+ if cpu_ext and 'CPU' in device:
+ set_cpu_extensions(core=ie, cpu_ext=cpu_ext)
+ return ie
+
+
+###
+# MODEL
+###
+
+
+@error_handling('reading {model} IR model')
+def get_net(model: str, core: IECore):
+ model_xml = model
+ model_bin = os.path.splitext(model_xml)[0] + ".bin"
+ net = core.read_network(model=model_xml, weights=model_bin)
+ return net
+
+
+@error_handling('loading network to plugin of {plugin.device} device')
+def get_exec_net(core, net, device):
+ return core.load_network(network=net, device_name=device)
+
+
+@error_handling('output \'{output}\' addition for network from model \'{model}\'')
+def get_net_copy_with_output(model: str, output: str, core: IECore):
+ net_copy = get_net(model=model, core=core)
+ if output not in ['None', None]:
+ net_copy.add_outputs(output)
+ return net_copy
+
+
+@error_handling('getting model layers info')
+def get_model_info(net: IENetwork):
+ layers = net.layers
+ precision = layers[list(layers.keys())[0]].out_data[0].precision
+ return layers, net.inputs, net.outputs, precision
+
+
+###
+# INFER
+###
+
+
+@error_handling('processing inference on \'{device}\' device')
+def get_infer_results(executable_network, inputs: dict):
+ return executable_network.infer(inputs=inputs)
+
+
+@error_handling('getting performance counts from executable network on \'{device}\' device')
+def get_perf_counts(executable_network):
+ return executable_network.requests[0].get_perf_counts()
+
+
+@error_handling('getting inference results for outputs: \'{output}\'')
+def infer(net: IENetwork, core: IECore, device : str, inputs: dict, output: list):
+ executable_network = get_exec_net(core=core, net=net, device=device)
+ infer_dict = get_infer_results(executable_network=executable_network, inputs=inputs)
+ pc = get_perf_counts(executable_network=executable_network)
+ no_i = 'no_info'
+ no_info_pc = {'cpu_time': no_i, 'exec_time': no_i, 'layer_type': no_i, 'real_time': no_i, 'status': no_i}
+ result = {}
+ for out in output:
+ if out not in infer_dict:
+ log.warning("There is no '{}' layer in Inference Engine outputs results".format(out))
+ continue
+ pc = pc[out] if out in pc else no_info_pc
+ pc['device'] = device
+ result = {out: [infer_dict[out], pc]}
+ return result
+
+
+@error_handling('getting inference results for outputs: \'{output}\'')
+def overall_accuracy_check(model: str, ref_model: str, out_layers: list, ref_out_layers: list, inputs: dict,
+ ref_inputs: dict, core: IECore, device: str, ref_core: IECore, ref_device : str, layers: str,
+ num_of_iterations: int):
+ global_times, ref_global_times = [], []
+ if layers in ['None', None]:
+ net_copy = get_net_copy_with_output(model=model, output=layers, core=core)
+ ref_net_copy = get_net_copy_with_output(model=ref_model, output=layers, core=ref_core)
+ for i in range(num_of_iterations):
+ t1 = datetime.datetime.now()
+ infer(net=net_copy, core=core, device=device, inputs=inputs, output=out_layers)
+ t2 = datetime.datetime.now()
+ infer(net=ref_net_copy, core=ref_core, device=ref_device, inputs=ref_inputs, output=ref_out_layers)
+ t3 = datetime.datetime.now()
+ global_times.append(t2 - t1)
+ ref_global_times.append(t3 - t2)
+ return global_times, ref_global_times
+
+
+def one_ir_mode(args):
+ core = get_plugin(args.device, args.l, args.config)
+ net = get_net(model=args.model, core=core)
+ net_layers, net_inputs, net_outputs, precision = get_model_info(net)
+ log.info('{}:{} vs {}:{}'.format(args.device, precision, args.reference_device, precision))
+ log.info('The same IR on both devices: {}'.format(args.model))
+ out_layers = get_layers_list(net_layers, net_inputs, net_outputs, args.layers)
+ print_input_layers(net_inputs)
+ print_output_layers(out_layers)
+ ref_core = get_plugin(args.reference_device, args.l, args.reference_config)
+ global_accuracy = []
+ inputs = input_processing(model_path=args.model, net_inputs=net_inputs, input_file=args.input)
+ global_times, ref_global_times = overall_accuracy_check(model=args.model, ref_model=args.model,
+ out_layers=out_layers, ref_out_layers=out_layers,
+ inputs=inputs, ref_inputs=inputs, core=core,
+ device=args.device, ref_core=ref_core,
+ ref_device=args.reference_device, layers=args.layers,
+ num_of_iterations=args.num_of_iterations)
+ for out_layer in out_layers:
+ log.info('Layer {} statistics'.format(out_layer))
+ net_copy = get_net_copy_with_output(model=args.model, output=out_layer, core=core)
+ results = infer(net=net_copy, core=core, device=args.device, inputs=inputs, output=[out_layer])
+ if out_layer not in results:
+ continue
+ out_blob, pc = results[out_layer]
+ ref_results = infer(net=net_copy, core=ref_core, device=args.reference_device, inputs=inputs, output=[out_layer])
+ if out_layer not in ref_results:
+ continue
+ ref_out_blob, ref_pc = ref_results[out_layer]
+ a_m = accuracy_metrics(out_blob=out_blob, ref_out_blob=ref_out_blob)
+ performance_metrics(pc=pc, ref_pc=ref_pc)
+ blob_counters(out_blob=out_blob, ref_out_blob=ref_out_blob)
+ global_accuracy = update_global_accuracy_matrics(global_accuracy=global_accuracy, current_accuracy=a_m)
+ print_all_over_the_net_metrics(global_times=global_times, ref_global_times=ref_global_times,
+ global_accuracy=global_accuracy)
+
+
+def two_ir_mode(args):
+ core = get_plugin(args.device, args.l, args.config)
+ ref_core = get_plugin(args.reference_device, args.l, args.reference_config)
+ net = get_net(model=args.model, core=core)
+ net_layers, net_inputs, net_outputs, precision = get_model_info(net)
+ ref_net = get_net(model=args.reference_model, core=ref_core)
+ ref_net_layers, ref_net_inputs, ref_net_outputs, ref_precision = get_model_info(ref_net)
+ log.info('{}:{} vs {}:{}'.format(args.device, precision, args.reference_device, ref_precision))
+ log.info('IR for {} : {}'.format(args.device, args.model))
+ log.info('IR for {} : {}'.format(args.reference_device, args.reference_model))
+ out_layers = get_layers_list(net_layers, net_inputs, net_outputs, args.layers)
+ ref_out_layers = get_layers_list(ref_net_layers, ref_net_inputs, ref_net_outputs, args.layers)
+ print_input_layers(net_inputs)
+ print_output_layers(out_layers)
+ layers_map = manage_user_outputs_with_mapping(mapping=args.mapping, reference_mapping=args.reference_mapping,
+ user_layers=out_layers)
+ inputs = input_processing(model_path=args.model, net_inputs=net_inputs, input_file=args.input,
+ layers_map=layers_map)
+ ref_inputs = input_processing(model_path=args.reference_model, net_inputs=ref_net_inputs, input_file=args.input,
+ layers_map=layers_map)
+ global_accuracy = []
+ global_times, ref_global_times = overall_accuracy_check(model=args.model, ref_model=args.reference_model,
+ out_layers=out_layers, ref_out_layers=ref_out_layers,
+ inputs=inputs, ref_inputs=ref_inputs, plugin=core,
+ ref_plugin=ref_core, layers=args.layers,
+ num_of_iterations=args.num_of_iterations)
+ for out_layer in layers_map:
+ ref_out_layer = layers_map[out_layer]
+ if out_layer == ref_out_layer:
+ log.info('Layer {} statistics'.format(out_layer))
+ else:
+ log.info('Statistics \'{}\' vs \'{}\''.format(out_layer, ref_out_layer))
+ net_copy = get_net_copy_with_output(model=args.model, output=out_layer, core=core)
+ ref_net_copy = get_net_copy_with_output(model=args.reference_model, output=ref_out_layer, core=ref_core)
+ results = infer(net=net_copy, core=core, device=args.device, inputs=inputs, output=[out_layer])
+ if out_layer not in results:
+ continue
+ out_blob, pc = results[out_layer]
+ ref_results = infer(net=ref_net_copy, core=ref_core, device=args.reference_device, inputs=ref_inputs, output=[ref_out_layer])
+ ref_out_blob, ref_pc = ref_results[ref_out_layer]
+ if ref_out_layer not in ref_results:
+ continue
+ a_m = accuracy_metrics(out_blob=out_blob, ref_out_blob=ref_out_blob)
+ performance_metrics(pc=pc, ref_pc=ref_pc)
+ blob_counters(out_blob=out_blob, ref_out_blob=ref_out_blob)
+ global_accuracy = update_global_accuracy_matrics(global_accuracy=global_accuracy, current_accuracy=a_m)
+ print_all_over_the_net_metrics(global_times=global_times, ref_global_times=ref_global_times,
+ global_accuracy=global_accuracy)
+
+
+def dump_mode(args):
+ core = get_plugin(args.device, args.l, args.config)
+ net = get_net(model=args.model, core=core)
+ out_layers = get_layers_list(net.layers, net.inputs, net.outputs, args.layers)
+ inputs = input_processing(args.model, net.inputs, args.input)
+ dump_dict = {}
+ for out_layer in out_layers:
+ log.info('Layer {} processing'.format(out_layer))
+ net_copy = get_net_copy_with_output(model=args.model, output=out_layer, core=core)
+ results = infer(net=net_copy, core=core, device=args.device, inputs=inputs, output=[out_layer])
+ if out_layer not in results:
+ continue
+ out_blob, pc = results[out_layer]
+ dump_dict[out_layer] = np.array({'blob': out_blob, 'pc': pc})
+ dump_output_file(args.model + '_' + args.device + '_dump.npz', dump_dict)
+
+
+def load_mode(args):
+ core = get_plugin(args.device, args.l, args.config)
+ log.info('IR for {} : {}'.format(args.device, args.model))
+ log.info('Loading blob from {}'.format(args.load))
+ net = get_net(model=args.model, core=core)
+ net_layers, net_inputs, net_outputs, precision = get_model_info(net)
+ out_layers = get_layers_list(net_layers, net_inputs, net_outputs, args.layers)
+ print_input_layers(net_inputs)
+ print_output_layers(out_layers)
+ layers_map = manage_user_outputs_with_mapping(mapping=args.mapping, reference_mapping=args.reference_mapping,
+ user_layers=out_layers)
+ inputs = input_processing(args.model, net_inputs, args.input, layers_map)
+ global_accuracy = []
+ loaded = load_dump(args.load)
+ for out_layer in layers_map:
+ ref_out_layer = layers_map[out_layer]
+ if out_layer == ref_out_layer:
+ log.info('Layer {} statistics'.format(out_layer))
+ else:
+ log.info('Statistics \'{}\' vs \'{}\''.format(out_layer, ref_out_layer))
+ net_copy = get_net_copy_with_output(model=args.model, output=out_layer, core=core)
+ results = infer(net=net_copy, core=core, device=args.device, inputs=inputs, output=[out_layer])
+ if out_layer not in results:
+ continue
+ out_blob, pc = results[out_layer]
+ if ref_out_layer not in loaded:
+ continue
+ ref_out_blob = loaded[ref_out_layer]['blob']
+ a_m = accuracy_metrics(out_blob=out_blob, ref_out_blob=ref_out_blob)
+ if 'pc' in loaded[ref_out_layer]:
+ ref_pc = loaded[ref_out_layer]['pc']
+ performance_metrics(pc=pc, ref_pc=ref_pc)
+ blob_counters(out_blob=out_blob, ref_out_blob=ref_out_blob)
+ global_accuracy = update_global_accuracy_matrics(global_accuracy=global_accuracy, current_accuracy=a_m)
+ print_all_over_the_net_metrics(global_accuracy=global_accuracy)
+
+
+def main(args):
+ log.info('Inference Engine:\n API version ............ {}'.format(ie.__version__), extra={'no_lvl': True})
+ set_verbosity(args.verbosity)
+ mode = find_out_cct_mode(args)
+ if mode == 1:
+ log.info('Cross check with one IR was enabled')
+ one_ir_mode(args)
+ elif mode == 2:
+ log.info('Cross check with two IRs was enabled')
+ two_ir_mode(args)
+ elif mode == 3:
+ log.info('Dump mode was enabled')
+ dump_mode(args)
+ elif mode == 4:
+ log.info('Load mode was enabled')
+ load_mode(args)
+ log.info("Execution successful")
+
+
+if __name__ == '__main__':
+ set_logger(log.DEBUG)
+ main(validate_args(build_parser().parse_args()))
--- /dev/null
+numpy
+opencv-python
\ No newline at end of file
--- /dev/null
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+import argparse
+import logging as log
+import os
+import sys
+import traceback
+import xml
+
+try:
+ import cv2
+except Exception as e:
+ log.error("Can not import OpenCV Python package.\nPlease install required python packages by running:\n"
+ "pip3 install -r requirements.txt\n\n Original error message: {}".format(e))
+ sys.exit(1)
+
+try:
+ import numpy as np
+except Exception as e:
+ log.error("Can not import numpy python package.\nPlease install required python packages by running:\n"
+ "pip3 install -r requirements.txt\n\n Original error message: {}".format(e))
+ sys.exit(1)
+
+verbosity = False
+
+
+def set_verbosity(flag: bool):
+ global verbosity
+ verbosity = flag
+
+
+###
+# USER INTERACTION
+###
+
+
+class LvlFormatter(log.Formatter):
+ usual = '[ %(levelname)s ] %(msg)s'
+ format_dict = {
+ 'no_lvl': '%(msg)s',
+ log.DEBUG: '[ %(asctime)s ] [ %(levelname)s ] [ %(module)s:%(lineno)d ] %(msg)s',
+ log.INFO: usual, log.WARNING: usual, log.ERROR: usual, log.CRITICAL: usual
+ }
+
+ def __init__(self, lvl, fmt=None):
+ log.Formatter.__init__(self, fmt)
+ self.lvl = lvl
+
+ def format(self, record: log.LogRecord):
+ if self.lvl == 'DEBUG':
+ self._style._fmt = self.format_dict[log.DEBUG]
+ else:
+ self._style._fmt = self.format_dict[record.levelno]
+ if 'no_lvl' in record.__dict__.keys() and record.__dict__['no_lvl']:
+ self._style._fmt = self.format_dict['no_lvl']
+ return log.Formatter.format(self, record)
+
+
+def set_logger(lvl: str):
+ logger = log.getLogger()
+ logger.setLevel(lvl)
+ handler = log.StreamHandler(sys.stdout)
+ handler.setFormatter(LvlFormatter(lvl))
+ logger.addHandler(handler)
+
+
+def error_handling(desc: str):
+ """
+ Error handler that prints description formatted with keyword arguments in case of exception
+ :param desc: description for an error
+ :return: decorator
+ """
+
+ def decorator(func):
+ def try_except_func(*args, **kwargs):
+ try:
+ return func(*args, **kwargs)
+ except Exception as e:
+ exception_type = type(e).__name__
+ log.error("The following error happened while {}:\n[ {} ] {}".format(desc.format(**kwargs),
+ exception_type, e))
+ global verbosity
+ if verbosity:
+ traceback.print_tb(tb=e.__traceback__, file=sys.stdout)
+ sys.exit(1)
+
+ return try_except_func
+
+ return decorator
+
+
+class ExistingFileAction(argparse.Action):
+ """
+ Expand user home directory paths and convert relative-paths to absolute.
+ """
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ if values is not None:
+ if not os.path.isfile(values):
+ log.error("File was not found: {}".format(values))
+ sys.exit(1)
+ setattr(namespace, self.dest, values)
+
+
+class ExistingDirAction(argparse.Action):
+ """
+ Expand user home directory paths and convert relative-paths to absolute.
+ """
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ if values is not None:
+ if not os.path.isdir(values):
+ log.error("Directory was not found: {}".format(values))
+ sys.exit(1)
+ setattr(namespace, self.dest, values)
+
+
+def build_parser():
+ parser = argparse.ArgumentParser(
+ prog='Cross Check Tool',
+ description='Cross Check Tool is a console application that enables comparing accuracy and provides performance'
+ ' metrics',
+ usage='\n' + '-' * 62 +
+ '\nFor cross precision check provide two IRs \n'
+ '(mapping files may be needed) run:'
+ '\npython3 cross_check_tool.py \\'
+ '\n--input path/to/file/describing/input \\'
+ '\n--model path/to/model/*.xml \\'
+ '\n--device device_for_model \\'
+ '\n--reference_model path/to/reference_model/*.xml \\'
+ '\n--reference_device reference_device_for_model \n'
+ + '-' * 62 +
+ '\nFor cross device check with one precision provide one IR run:'
+ '\npython3 cross_check_tool.py \\'
+ '\n--input path/to/file/describing/input \\'
+ '\n--model path/to/model/*.xml \\'
+ '\n--device device_for_model \\'
+ '\n--reference_device reference_device_for_model \n'
+ + '-' * 62 +
+ '\nFor dumping blob and performance counters run:'
+ '\npython3 cross_check_tool.py \\'
+ '\n--input path/to/file/describing/input \\'
+ '\n--model path/to/model/*.xml \\'
+ '\n--device device_for_model \\'
+ '\n--dump\n'
+ + '-' * 62 +
+ '\nFor check inference against dumped results run:'
+ '\npython3 cross_check_tool.py \\'
+ '\n--input path/to/file/describing/input \\'
+ '\n--model path/to/model/*.xml \\'
+ '\n--device device_for_model \\'
+ '\n--load path/to/dump/file/* \n'
+ + '-' * 62 +
+ '\nFor all layers check provide:\n'
+ '--layers=\'all\' \n'
+ 'For specific number of layers check provide:\n'
+ '--layers=\'layer_name,another_layer_name,...,last_layer_name\'\n'
+ + '-' * 62 +
+ '\nIf --input is empty CCT generates input(s) from normal\n'
+ 'distribution and dumps this input to a file\n'
+ + '-' * 62
+ )
+
+ model = parser.add_argument_group('Model specific arguments')
+ model.add_argument('--input', '-i', type=str, action=ExistingFileAction,
+ help='Path to an input image file or multi-input file to infer. Generates input(s) from normal '
+ 'distribution if empty')
+ # model.add_argument('--batch', '-b', type=int, help='Overrides batch size. Default is inherited from model')
+ model.add_argument('--model', '-m', type=str, action=ExistingFileAction,
+ help='Path to an .xml file that represents the first IR of the trained model to infer.')
+ model.add_argument('--reference_model', '-ref_m', type=str, action=ExistingFileAction,
+ help='Path to an .xml file that represents the second IR to compare the metrics. '
+ 'Uses --model if empty')
+ model.add_argument('--layers', '-layers', type=str, default=None,
+ help='Defines layers to check. Options: all, None - for output layers check, list of '
+ 'comma-separated layer names to check. Default value is None.')
+ model.add_argument('--mapping', '-map', type=str, action=ExistingFileAction,
+ help='Model Optimizer provided mapping for --model/-m')
+ model.add_argument('--reference_mapping', '-ref_map', type=str, action=ExistingFileAction,
+ help='Model Optimizer provided mapping for --reference_model/-ref_model')
+
+ plugin = parser.add_argument_group('Plugin specific arguments')
+ plugin.add_argument('--plugin_path', '-pp', type=str, action=ExistingDirAction, help='Path to a plugin folder.')
+ plugin.add_argument('--device', '-d', type=str, required=True,
+ help='The first target device to infer the model specified with the -m or --model option. '
+ 'CPU, GPU, HDDL or MYRIAD are acceptable.')
+ plugin.add_argument('--config', '-conf', type=str, action=ExistingFileAction,
+ help='Path to config file for -d or -device device plugin')
+ plugin.add_argument('--reference_device', '-ref_d', type=str,
+ help='The second target device to infer the model and compare the metrics. '
+ 'CPU, GPU, HDDL or MYRIAD are acceptable.')
+ plugin.add_argument('--reference_config', '-ref_conf', type=str, action=ExistingFileAction,
+ help='Path to config file for -ref_d or -reference_device device plugin')
+ plugin.add_argument('-l', type=str, action=ExistingFileAction,
+ help='Required for MKLDNN (CPU)-targeted custom layers. Comma separated paths to a shared'
+ ' libraries with the kernels implementation.')
+
+ modes = parser.add_argument_group('CCT mode arguments')
+ # TODO eps? nobody uses it
+ modes.add_argument('--dump', help='Enables blobs statistics dumping', action='store_true', default=False)
+ modes.add_argument('--load', type=str, action=ExistingFileAction, help='Path to a file to load blobs from')
+ model.add_argument('--num_of_iterations', '-ni', type=int, default=50,
+ help='Number of iterations to collect all over the net performance')
+ parser.add_argument('-v', '--verbosity', action='store_true', default=False,
+ help='Increase output verbosity')
+ return parser
+
+
+@error_handling('validating arguments passed to cross_check_tool.py')
+def validate_args(args):
+ # input check
+ if args.input is None:
+ log.info('No input was provided by --input/-i. Generate input from noise')
+ # model check
+ if args.model is None and args.reference_model is None:
+ raise Exception(
+ "Parameters --model/-m and --reference_model/-ref_m are empty. At least one of them is required")
+ elif args.model is None and args.reference_model:
+ args.model = args.reference_model
+ if args.model == args.reference_model:
+ args.reference_model = None
+ if args.model != args.reference_model and args.reference_model is not None and args.mapping is None and \
+ args.reference_mapping is None:
+ log.warning('Check over two different IRs was enabled. In case if layer names in this two IRs differ, '
+ 'please provide mapping files with --mapping/-map and --reference_mapping/-ref_map')
+ # device check
+ if args.device is None and args.reference_device is None:
+ raise Exception("Parameters -device/-d and -reference_device/-ref_d are not set. Can not proceed."
+ "\nFor more details use -h option")
+ if args.reference_device is None and args.reference_model is None and not args.dump and args.load is None:
+ raise Exception("Please provide --reference_model/-ref_m to compare executions on different devices."
+ "\nAnother option is to provide --dump key to dump all execution info on one device."
+ "\nOr provide --load key to compare execution on device with dumped info"
+ "\nFor more details use -h option")
+ if args.device is None:
+ args.device = args.reference_device
+ args.reference_device = None
+ # dump and load check
+ if args.dump and args.load is not None:
+ raise Exception("Cross Check Tool does not support both loading and dumping modes to be enabled. "
+ "Choose one of them and proceed")
+ if args.model is not None and args.reference_model is not None and args.dump or \
+ args.device is not None and args.reference_device is not None and args.dump:
+ raise Exception("Cross Check Tool does support dumping mode to be enabled only for one model on one device"
+ "\nFor more details use -h option")
+ if args.model is not None and args.reference_model is not None and args.load is not None or \
+ args.device is not None and args.reference_device is not None and args.load is not None:
+ raise Exception("Cross Check Tool does support loading mode to be enabled for one model on one device against a"
+ " dumped file\nFor more details use -h option")
+ return args
+
+
+def find_out_cct_mode(args):
+ """
+ 1 -- one IR mode
+ 2 -- two IRs mode
+ 3 -- dump mode
+ 4 -- load mode
+ """
+ # dump mode
+ if args.dump and args.model is not None and args.device is not None and \
+ args.reference_model is None and args.reference_device is None:
+ return 3
+ # load mode
+ if args.load is not None and args.model is not None and args.device is not None and args.reference_device is None:
+ return 4
+ # two IR mode
+ if args.model is not None and args.reference_model is not None:
+ return 2
+ # one IR mode
+ if args.model is not None and args.reference_model is None:
+ return 1
+ raise Exception('Unknown Cross Check Tool CLI configuration.\nFor more details use -h option')
+
+
+def print_input_layers(inputs: list):
+ word = 'inputs' if len(inputs) > 1 else 'input'
+ log.info('{} {} detected: {}'.format(len(inputs), word, ', '.join(inputs)))
+
+
+def print_output_layers(outputs: list):
+ layers = 'layers' if len(outputs) > 1 else 'layer'
+ log.info('Statistics will be dumped for {} {}: {}'.format(len(outputs), layers, ', '.join(outputs)))
+
+
+###
+# PLUGIN
+###
+
+
+@error_handling('parsing config file for plugin: \'{config_file}\'')
+def get_config_dictionary(config_file):
+ config = {'PERF_COUNT': 'YES'}
+ if not config_file:
+ return config
+ with open(config_file) as f:
+ config_line = f.readline()
+ key = config_line.split(config_file)[0]
+ value = config_line[len(key):].strip()
+ config[key] = value
+ return config
+
+
+###
+# INPUTS
+###
+
+
+def read_multi_input_file(input_file: str, net_inputs: dict):
+ npz = np.load(input_file, allow_pickle=True)
+ files = npz.files
+ dump = {}
+ for net_input in net_inputs:
+ if net_input not in files:
+ raise Exception('Can not find input data for input {} in multi-input file {}.\n'
+ 'Input data was provided for layers: {}\n'
+ 'Network inputs: {}'.format(net_input, input_file, ', '.join(files),
+ ', '.join(net_inputs.keys())))
+ if 'blob' in npz[net_input].item(0):
+ just_blob = npz[net_input].item(0)['blob']
+ network_shape = net_inputs[net_input].shape
+ log.info('Layer {} shape = {}, input blob from multi-input file shape = {}'
+ ''.format(net_input, network_shape, just_blob.shape))
+ try:
+ reshaped_blob = np.reshape(just_blob, network_shape)
+ except:
+ raise Exception('Can not reshape input blob from multi-input file for layer {} to shape {}'
+ ''.format(net_input, network_shape))
+ dump[net_input] = reshaped_blob
+ else:
+ raise Exception(
+ 'Can not find \'blob\' parameter for input {} in input file {}'.format(net_input, input_file))
+ return dump
+
+
+@error_handling('reading --input/-i by OpenCV python module. OpenCV version: {}. '
+ 'It may happen due to wrong input image format'.format(cv2.__version__))
+def read_image_file(input_file: str, net_inputs: dict):
+ inputs = dict()
+ if len(net_inputs) == 1:
+ image = cv2.imread(input_file)
+ if image is None:
+ raise Exception('Can not read input image ' + input_file)
+ only_layer_name = list(net_inputs.keys())[0]
+ shape = net_inputs[only_layer_name].shape
+ if len(shape) != 4:
+ raise Exception('Can not interpret input shape as image')
+ n, c, h, w = shape
+ image = cv2.resize(image, (w, h))
+ image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW
+ image = image.reshape((n, c, h, w))
+ inputs[only_layer_name] = image
+ else:
+ raise Exception('Multi-input topology detected. Please provide multi-input file to --input key')
+ return inputs
+
+
+def input_processing(model_path: str, net_inputs: dict, input_file: str, layers_map: dict = None):
+ inputs = dict()
+ if input_file is None:
+ for net_input in net_inputs:
+ inputs[net_input] = np.clip(np.random.normal(0.5, 0.1, size=net_inputs[net_input].shape), 0, 1)
+ dump_output_file(model_path + '_random_input_dump.npz', {inp: {'blob': inputs[inp]} for inp in inputs})
+ return inputs
+ try:
+ inputs = read_multi_input_file(input_file=input_file, net_inputs=net_inputs)
+ except:
+ inputs = read_image_file(input_file=input_file, net_inputs=net_inputs)
+ return inputs
+
+
+def accuracy_metrics(out_blob, ref_out_blob):
+ if out_blob.size != ref_out_blob.size:
+ raise Exception('Different number of elements in blobs {} and {}. Can not compare'
+ ''.format(out_blob.size, ref_out_blob.size))
+ abs_diff = np.absolute(out_blob - ref_out_blob)
+ rel_diff = np.divide(abs_diff, np.min(abs_diff) if np.min(abs_diff) != 0 else 1e-20)
+
+ metrics = [
+ ('Max absolute difference', np.max(abs_diff)),
+ ('Min absolute difference', np.min(abs_diff)),
+ ('Max relative difference', np.max(rel_diff)),
+ ('Min relative difference', np.min(rel_diff)),
+ ('Min reference value', np.min(ref_out_blob)),
+ ('Min absolute reference value', np.min(np.abs(ref_out_blob))),
+ ('Max reference value', np.max(ref_out_blob)),
+ ('Max absolute reference value', np.max(np.abs(ref_out_blob))),
+ ('Min actual value', np.min(out_blob)),
+ ('Min absolute actual value', np.min(np.abs(out_blob))),
+ ('Max actual value', np.max(out_blob)),
+ ('Max absolute actual value', np.max(np.abs(out_blob)))
+ ]
+
+ for key, value in metrics:
+ if len(str(value)) > 5:
+ log.info('{:>35} : {:.5E}'.format(key, value), extra={'no_lvl': True})
+ else:
+ log.info('{:>35} : {}'.format(key, value), extra={'no_lvl': True})
+ return {metric: value for metric, value in metrics}
+
+
+def performance_metrics(pc, ref_pc):
+ compare = [
+ ('Device', '-d ' + pc['device'], '-ref_d ' + ref_pc['device']),
+ ('Status', pc['status'], ref_pc['status']),
+ ('Layer type', pc['layer_type'], ref_pc['layer_type']),
+ ('Real time, microsec', pc['real_time'], ref_pc['real_time'])
+ ]
+
+ for metric, actual, reference in compare:
+ log.info('{:>35}: {:>16} {:>16}'.format(metric, actual, reference), extra={'no_lvl': True})
+
+
+def blob_counters(out_blob, ref_out_blob):
+ counters = [
+ ('Number of NAN', np.sum(np.isnan(out_blob)), np.sum(np.isnan(ref_out_blob))),
+ ('Number of INF', np.sum(np.isinf(out_blob)), np.sum(np.isinf(ref_out_blob))),
+ ('Number of ZERO', out_blob.size - np.count_nonzero(out_blob),
+ ref_out_blob.size - np.count_nonzero(ref_out_blob))
+ ]
+ for metric, actual, reference in counters:
+ log.info('{:>35}: {:>16} {:>16}'.format(metric, actual, reference), extra={'no_lvl': True})
+
+
+def update_global_accuracy_matrics(global_accuracy: list, current_accuracy: dict):
+ metrics = [
+ ('Max absolute difference', lambda x, y: max(x, y)),
+ ('Min absolute difference', lambda x, y: min(x, y)),
+ ('Max relative difference', lambda x, y: max(x, y)),
+ ('Min relative difference', lambda x, y: min(x, y))]
+ for metric, formula in metrics:
+ global_metric = [item for item in global_accuracy if item[0] == metric]
+ if len(global_metric) == 1:
+ g_metric, g_value = global_metric[0]
+ global_accuracy.remove(global_metric[0])
+ global_accuracy.append((metric, formula(g_value, current_accuracy[metric])))
+ else:
+ global_accuracy.append((metric, current_accuracy[metric]))
+ return global_accuracy
+
+
+def print_all_over_the_net_metrics(global_accuracy: (str, float), global_times: list = None,
+ ref_global_times: list = None):
+ if global_times is not None and ref_global_times is not None and len(global_times) and len(ref_global_times):
+ log.info('-' * 70, extra={'no_lvl': True})
+ log.info('{:>35}: {:>16,.5E} {:>16,.5E}'.format(
+ 'Overall performance, microseconds', global_times[len(global_times) // 2].microseconds,
+ ref_global_times[len(ref_global_times) // 2].microseconds), extra={'no_lvl': True})
+ log.info('-' * 70, extra={'no_lvl': True})
+ for metric, value in global_accuracy:
+ log.info('{} {} = {}'.format('Overall', metric.lower(), value))
+
+
+###
+# MAPPING
+###
+
+
+def read_mapping(file_name: str):
+ # TODO check twice
+ mapping_dict = {}
+ xml_tree = xml.etree.ElementTree.parse(file_name)
+ xml_root = xml_tree.getroot()
+ for child in xml_root:
+ fw_info = child.find('.//framework')
+ ir_info = child.find('.//IR')
+ if fw_info is None:
+ continue
+ if ir_info is None:
+ continue
+ framework_name = fw_info.attrib['name'] + ':' + fw_info.attrib['out_port_id']
+ ir_name = ir_info.attrib['name'] if ir_info is not None else None
+ ir_layer_id = int(ir_info.attrib['id']) if ir_info is not None else None
+ mapping_dict[framework_name] = (ir_name, ir_layer_id)
+ return mapping_dict
+
+
+def map_layers(mapping_file: str = None, ref_mapping_file: str = None):
+ if mapping_file is not None and ref_mapping_file is not None:
+ mapping = read_mapping(mapping_file)
+ ref_mapping = read_mapping(ref_mapping_file)
+ mapping = {layer: ref_layer for layer in mapping for ref_layer in ref_mapping if layer == ref_layer}
+ return mapping
+
+
+def manage_user_outputs_with_mapping(mapping, reference_mapping, user_layers):
+ if mapping is not None and reference_mapping is not None:
+ layers_map = map_layers(mapping, reference_mapping)
+ else:
+ layers_map = {layer: layer for layer in user_layers}
+ for layer in user_layers:
+ if layer not in layers_map:
+ if mapping is not None and reference_mapping is not None:
+ log.warning(
+ 'Can not map layer {} from --model/-m to any layer from --reference_model/-ref_m'.format(layer))
+ else:
+ log.warning('Can not find layer {} in --reference_model/-ref_m model'.format(layer))
+ for layer in layers_map:
+ if layer not in user_layers:
+ del layers_map[layer]
+ return layers_map
+
+
+def get_layers_list(all_layers: dict, inputs: dict, outputs: list, layers: str):
+ if layers is not None and layers != 'None':
+ if layers == 'all':
+ return {name: layer for name, layer in all_layers.items() if layer.type not in ['Const']}
+ else:
+ user_layers = [layer.strip() for layer in layers.split(',')]
+ layers_to_check = []
+ for user_layer in user_layers:
+ if user_layer not in all_layers:
+ raise Exception("Layer {} doesn't exist in the model".format(user_layer))
+ if user_layer in inputs:
+ raise Exception("Layer {} is input layer. Can not proceed".format(user_layer))
+ layers_to_check.append(user_layer)
+ return layers_to_check
+ else:
+ return outputs
+
+
+###
+# FILES
+###
+
+def dump_output_file(output_file, dump_dict):
+ np.savez_compressed(output_file, **dump_dict)
+ log.info('Dump file path: {}'.format(output_file))
+
+
+def load_dump(file_to_load: str):
+ npz = np.load(file_to_load, allow_pickle=True)
+ dump = {file: npz[file].item(0) for file in npz}
+ return dump
--- /dev/null
+benchmark_tool/benchmark_app.py
+benchmark_tool/requirements.txt
+benchmark_tool/README.md
+cross_check_tool/__init__.py
+cross_check_tool/utils.py
+cross_check_tool/requirements.txt
+cross_check_tool/README.md
+cross_check_tool/cross_check_tool.py
\ No newline at end of file
extensions/back/blob_normalizer.py
extensions/back/compress_quantized_weights.py
extensions/back/ConvolutionNormalizer.py
+extensions/back/CorrectName.py
extensions/back/CropToStridedSlice.py
extensions/back/CutMemory.py
extensions/back/disable_unsupported_ND_operations.py
extensions/front/flatten_to_reshape.py
extensions/front/freeze_placeholder_value.py
extensions/front/GeLUMerger_Erf.py
+extensions/front/GeLUMerger_Tanh.py
extensions/front/global_pooling_to_reduce.py
extensions/front/image_scaler.py
extensions/front/input_cut.py
extensions/front/onnx/shape_ext.py
extensions/front/onnx/slice_ext.py
extensions/front/onnx/softmax_ext.py
+extensions/front/onnx/softmaxONNX_to_softmax.py
extensions/front/onnx/split_ext.py
extensions/front/onnx/squeeze_ext.py
extensions/front/onnx/top_k_ext.py
extensions/middle/ScaleInput.py
extensions/middle/SharedWeightsDuplication.py
extensions/middle/SliceConverter.py
+extensions/middle/SliceLikeToStridedSlice.py
extensions/middle/space_to_depth.py
extensions/middle/sparse_reshape.py
extensions/middle/ssd_anchors_to_const.py
extensions/middle/UpsampleToResample.py
extensions/middle/UselessMerge.py
extensions/middle/UselessSplitEraser.py
-extensions/middle/UselessStridedSlice.py
extensions/middle/wights_permute_normalizer.py
extensions/ops/__init__.py
extensions/ops/accum.py
extensions/ops/embedding_bag.py
extensions/ops/Enter.py
extensions/ops/Exit.py
-extensions/ops/exp.py
extensions/ops/fakequantize.py
extensions/ops/gather.py
extensions/ops/GatherNd.py
extensions/ops/instance_normalization.py
extensions/ops/interp.py
extensions/ops/interpolate.py
-extensions/ops/Log.py
extensions/ops/LSTM.py
extensions/ops/lstm_cell.py
extensions/ops/lstm_sequence.py
extensions/ops/shufflechannel.py
extensions/ops/simplernms.py
extensions/ops/size.py
+extensions/ops/slice_like.py
extensions/ops/space_to_depth.py
extensions/ops/sparse_fill_empty_rows.py
extensions/ops/sparse_reshape.py
install_prerequisites/install_prerequisites_onnx.sh
install_prerequisites/install_prerequisites_tf.bat
install_prerequisites/install_prerequisites_tf.sh
+install_prerequisites/protobuf-3.6.1-py3.5-win-amd64.egg
+install_prerequisites/protobuf-3.6.1-py3.6-win-amd64.egg
+install_prerequisites/protobuf-3.6.1-py3.7-win-amd64.egg
mo.py
mo/__init__.py
mo/back/__init__.py
mo/pipeline/common.py
mo/pipeline/unified.py
mo/utils/__init__.py
+mo/utils/broadcasting.py
mo/utils/class_registration.py
mo/utils/cli_parser.py
mo/utils/custom_replacement_config.py
mo/utils/dsu.py
mo/utils/error.py
mo/utils/find_inputs.py
+mo/utils/get_ov_update_message.py
mo/utils/graph.py
mo/utils/guess_framework.py
mo/utils/import_extensions.py
mo/utils/ir_reader/extenders/GRUCell_extender.py
mo/utils/ir_reader/extenders/interpolate_extender.py
mo/utils/ir_reader/extenders/LSTMCell_extender.py
+mo/utils/ir_reader/extenders/non_max_suppression_extender.py
mo/utils/ir_reader/extenders/non_zero_extender.py
mo/utils/ir_reader/extenders/pad_extender.py
mo/utils/ir_reader/extenders/parameter_extender.py
mo/utils/ir_reader/extenders/priorbox_extender.py
mo/utils/ir_reader/extenders/reorg_yolo_extender.py
mo/utils/ir_reader/extenders/RNNCell_extender.py
+mo/utils/ir_reader/extenders/shape_of_extender.py
mo/utils/ir_reader/extenders/strided_slice_extender.py
mo/utils/ir_reader/extenders/tensoriterator_extender.py
mo/utils/ir_reader/extenders/topk_extender.py
--- /dev/null
+"""
+ Copyright (C) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.graph.graph import Graph
+from mo.back.replacement import BackReplacementPattern
+
+
+class RestoreOriginalFrameworkName(BackReplacementPattern):
+ """
+ This transformation corrects names of layers to their framework names.
+ To perform this correction, framework layer name should be in the attribute 'framework_node_name'.
+ In some cases, renaming is necessary only if some condition is fulfilled. Such condition should be a some
+ function in the attribute 'rename_condition'.
+
+ For example, in the transformation SoftmaxONNXFrontReplacer such condition is
+ lambda n: len(n.graph.get_op_nodes(name=node_name)) == 0
+ """
+
+ enabled = True
+
+ def find_and_replace_pattern(self, graph: Graph):
+ for node in graph.get_op_nodes():
+ if not node.has_valid('framework_node_name'):
+ continue
+
+ if node.has_valid('rename_condition'):
+ need_renaming = node['rename_condition'](node)
+ del node['rename_condition']
+ if need_renaming:
+ node.name = node['framework_node_name']
+ else:
+ node.name = node['framework_node_name']
+
+ del node['framework_node_name']
from extensions.back.ElementwiseOpsToEltwiseOps import SimpleEltwiseToEltwiseOp
from extensions.back.insert_compatibility_l2normalization import CompatibilityL2NormalizationPattern
from extensions.ops.elementwise import Mul
+from extensions.ops.normalize_l2 import NormalizeL2Op
from mo.back.replacement import BackReplacementPattern
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Graph, rename_node
-from mo.ops.const import Const
class NormalizeToNormalizeL2(BackReplacementPattern):
mul = Mul(graph, {'name': output_name}).create_node()
rename_node(mul, output_name)
- node.out_port(0).get_connection().set_source(mul.out_port(0))
- node.out_port(0).connect(mul.in_port(0))
- node.in_port(1).get_connection().get_source().connect(mul.in_port(1))
- node.in_port(1).disconnect()
-
- node['type'] = 'NormalizeL2'
- node['eps_mode'] = 'add'
- node['force_precision_in_ports'] = {1: 'int64'}
+ if not node.across_spatial:
+ axes = int64_array([1])
+ else:
+ axes = int64_array(np.arange(start=1, stop=node.in_port(0).data.get_shape().size))
- axes_val = np.array([1]) if not node.across_spatial else \
- np.arange(start=1, stop=node.in_port(0).data.get_shape().size)
- axes = Const(graph, {'value': axes_val}).create_node()
- node.in_port(1).connect(axes.out_port(0))
+ normalizel2 = create_op_with_const_inputs(graph, NormalizeL2Op, {1: axes}, {'eps_mode': 'add', 'eps': node.eps})
- del node['across_spatial']
- del node['channel_shared']
+ node.out_port(0).get_connection().set_source(mul.out_port(0))
+ node.in_port(1).get_connection().get_source().connect(mul.in_port(1))
+ normalizel2.out_port(0).connect(mul.in_port(0))
+ node.in_port(0).get_connection().set_destination(normalizel2.in_port(0))
class ChangeCastOutputType(FrontReplacementSubgraph):
"""
- Change the Cast to int64 to int32 since not all plugins support int64 data type.
Change the Cast to fp64 to fp32 since not all plugins support fp64 data type.
Change the Cast to fp32 to fp16 when generating IR for fp16.
"""
def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):
node = match['cast']
- if node.dst_type == np.int64:
- log.warning('Change data type from {} to {} for node {}'.format(node.dst_type, np.int32, node.name))
- node.dst_type = np.int32
-
if node.dst_type == np.float64:
log.warning('Change data type from {} to {} for node {}'.format(node.dst_type, np.float32, node.name))
node.dst_type = np.float32
"""
import logging as log
+
from math import sqrt, fabs
from extensions.ops.gelu import GeLUOP
('div', dict(op='Div')),
('erf', dict(op='Erf')),
('add', dict(op='Add')),
+ ('mul_param', dict(op='Const')),
+ ('div_param', dict(op='Const')),
+ ('add_param', dict(op='Const')),
+
],
edges=[
('mul', 'mul0'),
('div', 'erf'),
('erf', 'add'),
- ('add', 'mul0')
+ ('add', 'mul0'),
+ ('mul_param', 'mul'),
+ ('div_param', 'div'),
+ ('add_param', 'add'),
])
def replace_sub_graph(self, graph: Graph, match: dict):
# Gaussian Error Linear Unit
# f(x) = 0.5 * x * (1 + erf(x / sqrt(2))
- add = match['add']
- mul = match['mul']
div = match['div']
inp_port = div.in_port(0).get_source()
inp = inp_port.node
log.debug('Found potential Erf-based GeLU pattern after {} with name {}'.format(inp.op, inp.name))
# take the values of the mul, add and div
- div_param = None
- mul_param = None
- add_param = None
- if div.in_port(0).get_source().node.soft_get('type') == 'Const':
- div_param = div.in_port(0).get_source().node.value
- elif div.in_port(1).get_source().node.soft_get('type') == 'Const':
- div_param = div.in_port(1).get_source().node.value
-
- if mul.in_port(0).get_source().node.soft_get('type') == 'Const':
- mul_param = mul.in_port(0).get_source().node.value
- elif mul.in_port(1).get_source().node.soft_get('type') == 'Const':
- mul_param = mul.in_port(1).get_source().node.value
-
- if add.in_port(0).get_source().node.soft_get('type') == 'Const':
- add_param = add.in_port(0).get_source().node.value
- elif add.in_port(1).get_source().node.soft_get('type') == 'Const':
- add_param = add.in_port(1).get_source().node.value
+ div_param = match['div_param']
+ add_param = match['add_param']
+ mul_param = match['mul_param']
- if mul_param is None or div_param is None or add_param is None:
- return
+ if add_param.value.size == 1 and mul_param.value.size == 1 and div_param.value.size == 1:
+ mul_param = match['mul_param'].value.item()
+ add_param = match['add_param'].value.item()
+ div_param = match['div_param'].value.item()
- sqrt2 = sqrt(2.0)
- # check that the values match the approximation
- if fabs(div_param - sqrt2) < 1e-06 and mul_param == 0.5 and add_param == 1.0:
- log.debug('Confirmed Erf-based GELU pattern after {} with name {}'.format(inp.op, inp.name))
- gelu = GeLUOP(graph, dict(name=inp.name + '/GELU_')).create_node()
- inp_port.connect(gelu.in_port(0))
- match['mul0'].out_port(0).get_connection().set_source(gelu.out_port(0))
+ sqrt2 = sqrt(2.0)
+ # check that the values match the approximation
+ if fabs(div_param - sqrt2) < 1e-06 and mul_param == 0.5 and add_param == 1.0:
+ log.debug('Confirmed Erf-based GELU pattern after {} with name {}'.format(inp.op, inp.name))
+ gelu = GeLUOP(graph, dict(name=inp.name + '/GELU_')).create_node()
+ inp_port.connect(gelu.in_port(0))
+ match['mul0'].out_port(0).get_connection().set_source(gelu.out_port(0))
--- /dev/null
+"""
+ Copyright (C) 2017-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+from math import sqrt, fabs, pi
+
+from extensions.ops.gelu import GeLUOP
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph
+
+
+class GeLUMergerTanh(FrontReplacementSubgraph):
+ enabled = True
+ graph_condition = [lambda graph: graph.graph['cmd_params'].generate_experimental_IR_V10]
+
+ def pattern(self):
+ log.info('Enabled GeLU Merger for approximation with TanH')
+ return dict(
+ nodes=[
+ ('pow', dict(op='Pow')),
+ ('mul', dict(op='Mul')),
+ ('mul0', dict(op='Mul')),
+ ('mul1', dict(op='Mul')),
+ ('mul2', dict(op='Mul')),
+ ('tanh', dict(op='Tanh')),
+ ('add', dict(op='Add')),
+ ('add0', dict(op='Add')),
+ ('mul_param', dict(op='Const')),
+ ('mul0_param', dict(op='Const')),
+ ('mul1_param', dict(op='Const')),
+ ],
+ edges=[
+ ('pow', 'mul'),
+ ('mul', 'add'),
+ ('add', 'mul0'),
+ ('mul0', 'tanh'),
+ ('tanh', 'add0'),
+ ('add0', 'mul1'),
+ ('mul1', 'mul2'),
+ ('mul_param', 'mul'),
+ ('mul0_param', 'mul0'),
+ ('mul1_param', 'mul1'),
+ ])
+
+ def replace_sub_graph(self, graph: Graph, match: dict):
+ # Gaussian Error Linear Unit, TanH based approximation:
+ # 0.5*x*(1 + tanh([sqrt(2/pi)]*[x + 0.044715x3])
+ inp_port = match['pow'].in_port(0).get_source()
+ inp = inp_port.node
+ log.debug('Found potential TanH-based GeLU pattern after {} with name {}'.format(inp.op, inp.name))
+
+ # take the values of the mul ops
+ mul_param = match['mul_param']
+ mul0_param = match['mul0_param']
+ mul1_param = match['mul1_param']
+ if mul0_param.value.size == 1 and mul_param.value.size == 1 and mul1_param.value.size == 1:
+ mul_param = match['mul_param'].value.item()
+ mul0_param = match['mul0_param'].value.item()
+ mul1_param = match['mul1_param'].value.item()
+ sqrt2pi = sqrt(2.0/pi)
+ # check that the values match the approximation
+ if fabs(mul0_param - sqrt2pi) < 1e-06 and fabs(mul_param - 0.044715) < 1e-06 and mul1_param == 0.5:
+ log.debug('Confirmed TanH-based GELU pattern after {} with name {}'.format(inp.op, inp.name))
+ gelu = GeLUOP(graph, dict(name=inp.name + '/GELU_')).create_node()
+ inp_port.connect(gelu.in_port(0))
+ match['mul2'].out_port(0).get_connection().set_source(gelu.out_port(0))
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+from math import sqrt
+
+from extensions.front.GeLUMerger_Erf import GeLUMergerErf
+from extensions.front.GeLUMerger_Tanh import GeLUMergerTanh
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes_erf = {
+ 'inp': {'kind': 'op', 'op': 'AnyOp'},
+ 'mul': {'kind': 'op', 'op': 'Mul'},
+ 'mul0': {'kind': 'op', 'op': 'Mul'},
+ 'div': {'kind': 'op', 'op': 'Div'},
+ 'erf': {'kind': 'op', 'op': 'Erf'},
+ 'add': {'kind': 'op', 'op': 'Add'},
+ 'mul_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+ 'div_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+ 'add_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+ 'out': {'kind': 'op', 'op': 'AnyOp'},
+}
+
+nodes_attributes_tanh = {
+ 'inp': {'kind': 'op', 'op': 'AnyOp'},
+ 'pow': {'kind': 'op', 'op': 'Pow'},
+ 'mul': {'kind': 'op', 'op': 'Mul'},
+ 'mul0': {'kind': 'op', 'op': 'Mul'},
+ 'mul1': {'kind': 'op', 'op': 'Mul'},
+ 'mul2': {'kind': 'op', 'op': 'Mul'},
+ 'tanh': {'kind': 'op', 'op': 'Tanh'},
+ 'add': {'kind': 'op', 'op': 'Add'},
+ 'add0': {'kind': 'op', 'op': 'Add'},
+ 'mul_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+ 'mul0_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+ 'mul1_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+ 'out': {'kind': 'op', 'op': 'AnyOp'},
+}
+
+nodes_attributes_ref = {
+ 'inp': {'kind': 'op', 'op': 'AnyOp'},
+ 'gelu': {'kind': 'op', 'op': 'Gelu'},
+ 'out': {'kind': 'op', 'op': 'AnyOp'},
+}
+
+class TestGeLUMergerReplacement(unittest.TestCase):
+ def test_GeLUMergerErf_test_1(self):
+ graph = build_graph(nodes_attributes_erf,
+ [('inp', 'mul0', {'out': 0}),
+ ('inp', 'div', {'out': 0}),
+ ('mul', 'mul0'),
+ ('div', 'erf'),
+ ('erf', 'add'),
+ ('add', 'mul0'),
+ ('mul_param', 'mul'),
+ ('div_param', 'div'),
+ ('add_param', 'add'),
+ ('mul0', 'out'),
+ ],
+ {'mul_param': {'shape': np.array([1]), 'value': np.array(0.5)},
+ 'add_param': {'shape': np.array([1]), 'value': np.array(1.0)},
+ 'div_param': {'shape': np.array([1]), 'value': np.array(sqrt(2.0))}
+ },
+ nodes_with_edges_only=True)
+ graph_ref = build_graph(nodes_attributes_ref,
+ [('inp', 'gelu'),
+ ('gelu', 'out')],
+ {}, nodes_with_edges_only=True)
+ graph.stage = 'front'
+
+ replacer = GeLUMergerErf()
+ replacer.find_and_replace_pattern(graph)
+
+ (flag, resp) = compare_graphs(graph, graph_ref, 'out', check_op_attrs=True)
+ self.assertTrue(flag, resp)
+
+ def test_GeLUMergerTanh_test_2(self):
+ graph = build_graph(nodes_attributes_tanh,
+ [('inp', 'mul2', {'out': 0}),
+ ('inp', 'add', {'out': 0}),
+ ('inp', 'pow', {'out': 0}),
+ ('pow', 'mul'),
+ ('mul', 'add'),
+ ('add', 'mul0'),
+ ('mul0', 'tanh'),
+ ('tanh', 'add0'),
+ ('add0', 'mul1'),
+ ('mul1', 'mul2'),
+ ('mul_param', 'mul'),
+ ('mul0_param', 'mul0'),
+ ('mul1_param', 'mul1'),
+ ('mul2', 'out'),
+ ],
+ {'mul0_param': {'shape': np.array([1]), 'value': np.array(sqrt(2.0/3.1415926))},
+ 'mul1_param': {'shape': np.array([1]), 'value': np.array(0.5)},
+ 'mul_param': {'shape': np.array([1]), 'value': np.array(0.044715)}
+ },
+ nodes_with_edges_only=True)
+ graph_ref = build_graph(nodes_attributes_ref,
+ [('inp', 'gelu'),
+ ('gelu', 'out')],
+ {}, nodes_with_edges_only=True)
+ graph.stage = 'front'
+
+ replacer = GeLUMergerTanh()
+ replacer.find_and_replace_pattern(graph)
+
+ (flag, resp) = compare_graphs(graph, graph_ref, 'out', check_op_attrs=True)
+ self.assertTrue(flag, resp)
\ No newline at end of file
"""
import numpy as np
-from extensions.ops.Log import LogOp
+from extensions.ops.activation_ops import Log
from extensions.ops.elementwise import Add
from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Graph, Node
+from mo.graph.graph import Graph, Node, rename_nodes
from mo.ops.const import Const
enabled = True
def replace_op(self, graph: Graph, node: Node):
+ node_name = node.soft_get('name', node.id)
const_dtype = np.float32
if node.has_valid('data_type'):
const_dtype = node.data_type
const = Const(graph, {'value': np.array([1], dtype=const_dtype)}).create_node()
add = Add(graph, {'name': node.name + '/Add_'}).create_node()
- log = LogOp(graph, {'name': node.name + '/Log_'}).create_node()
+ log = Log(graph, {'name': node.name + '/Log_'}).create_node()
# Connect nodes: input -> Add -> Log
const.out_port(0).connect(add.in_port(0))
node.in_port(0).get_connection().set_destination(add.in_port(1))
add.out_port(0).connect(log.in_port(0))
+ rename_nodes([(node, node_name + '/delete'), (log, node_name)])
# The "explicit" version of the return value is: [(out_node.id, 0)])
return [log.id]
-
See the License for the specific language governing permissions and
limitations under the License.
"""
-from extensions.ops.Log import LogOp
from extensions.ops.ReduceOps import ReduceMax, ReduceSum
+from extensions.ops.activation_ops import Exp, Log
from extensions.ops.elementwise import Sub
-from extensions.ops.exp import ExpOp
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementOp
from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Graph, Node, rename_nodes
-from mo.ops.const import Const
class LogSoftmaxFrontReplacer(FrontReplacementOp):
reduce_max_node.out_port(0).connect(first_sub_node.in_port(1))
# Creating of Exp -> ReduceSum -> Log block
- exp_node = ExpOp(graph, {'name': node_name + '/Exp_'}).create_node()
+ exp_node = Exp(graph, {'name': node_name + '/Exp_'}).create_node()
reduce_sum_node = create_op_with_const_inputs(graph,
ReduceSum,
{1: int64_array([node.axis])},
op_attrs={'name': node_name + '/ReduceSum_', 'keep_dims': True})
- log_node = LogOp(graph, {'name': node_name + '/Log_'}).create_node()
+ log_node = Log(graph, {'name': node_name + '/Log_'}).create_node()
first_sub_node.out_port(0).connect(exp_node.in_port(0))
exp_node.out_port(0).connect(reduce_sum_node.in_port(0))
limitations under the License.
"""
+from extensions.ops.slice_like import SliceLike
from mo.front.extractor import FrontExtractorOp
from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
-from mo.ops.crop import Crop
class SliceLikeFrontExtractor(FrontExtractorOp):
@classmethod
def extract(cls, node):
attrs = get_mxnet_layer_attrs(node.symbol_dict)
- axes = attrs.tuple("axes", int, [])
- offset = [0 for i in range(0, axes[-1])]
+ axes = list(attrs.tuple("axes", int, []))
node_attrs = {
- 'axis': 1,
- 'offset': offset,
- 'dim': offset,
'axes': axes
}
# update the attributes of the node
- Crop.update_node_stat(node, node_attrs)
+ SliceLike.update_node_stat(node, node_attrs)
return cls.enabled
from extensions.front.mxnet.eltwise_scalar_replacers import MulScalarFrontReplacer
from extensions.front.mxnet.ssd_detection_output_replacer import SsdPatternDetectionOutputReplacer
from extensions.front.split_normalizer import AttributedSplitToSplit
+from extensions.ops.slice_like import SliceLike
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.graph.graph import Graph, Node
from mo.middle.pattern_match import find_pattern_matches
nodes=[
('power', dict(op='Mul')),
('anchor', dict(op='Const')),
- ('slice_like', dict(op='Crop')),
+ ('slice_like', dict(op='slice_like')),
('reshape1', dict(op='Reshape')),
('reshape2', dict(op='Reshape')),
('reshape3', dict(op='Reshape'))
variants = np.array([variants_dict['mul_scalar1x'], variants_dict['mul_scalar1y'],
variants_dict['mul_scalar2x'], variants_dict['mul_scalar2y']] * int(const.value.size / 4)).reshape(const.value.shape)
- priorbox_variants = Const(graph, dict(value=variants, symbol_dict={'name': const.id + '/priorbox_variants'})).create_node()
- variants_slice_like = Crop(graph, dict(axis=slice_like.axis, offset=slice_like.offset, dim=slice_like.dim, axes=slice_like.axes,
- symbol_dict={'name': slice_like.id + '/variants_slice_like'})) \
- .create_node()
+ priorbox_variants = Const(graph, dict(value=variants, name=const.id + '/priorbox_variants')).create_node()
+ variants_slice_like = SliceLike(graph, dict(axes=slice_like.axes,
+ name=slice_like.id + '/variants_slice_like')).create_node()
variants_slice_like.in_port(0).connect(priorbox_variants.out_port(0))
variants_slice_like.in_port(1).connect(crop_shape.out_port(0))
nodes=[
('power', dict(op='Mul')),
('anchor', dict(op='Const')),
- ('slice_like', dict(op='Crop')),
+ ('slice_like', dict(op='slice_like')),
('reshape1', dict(op='Reshape')),
('reshape2', dict(op='Reshape')),
('reshape3', dict(op='Reshape')),
@classmethod
def extract(cls, node):
- Broadcast.update_node_stat(node)
+ Broadcast.update_node_stat(node, {'mode': 'bidirectional'})
return cls.enabled
See the License for the specific language governing permissions and
limitations under the License.
"""
+import numpy as np
from extensions.ops.non_max_suppression import NonMaxSuppression
from mo.front.extractor import FrontExtractorOp
encoding_map = {0: 'corner', 1: 'center'}
center_point_box = onnx_attr(node, 'center_point_box', 'i', default=0)
NonMaxSuppression.update_node_stat(node, {'sort_result_descending': 0,
+ 'output_type': np.int64,
'box_encoding': encoding_map[center_point_box]})
return cls.enabled
node.soft_get('name', node.id), pads))
# Try to convert slightly incorrect models with insufficient pad parameters
- assert pads.size * 2 == kernel_shape.size
+ assert pads.size == kernel_shape.size
pads = np.concatenate([pads, pads])
log.warning('Extended pads to {}'.format(pads))
See the License for the specific language governing permissions and
limitations under the License.
"""
+import numpy as np
from mo.front.extractor import FrontExtractorOp
from mo.ops.shape import Shape
@classmethod
def extract(cls, node):
- Shape.update_node_stat(node)
+ Shape.update_node_stat(node, {'output_type': np.int64})
return cls.enabled
--- /dev/null
+"""
+ Copyright (C) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Graph, Node, rename_nodes
+from mo.ops.flatten import FlattenONNX
+from mo.ops.reshape import Reshape
+from mo.ops.shape import Shape
+from mo.ops.softmax import Softmax
+
+
+class SoftmaxONNXFrontReplacer(FrontReplacementOp):
+ """
+ Replace SoftmaxONNX operation with FlattenONNX -> Softmax -> Reshape subgraph
+ """
+ op = "SoftMaxONNX"
+ enabled = True
+
+ def run_before(self):
+ from extensions.front.onnx.flattenONNX_to_reshape import FlattenONNXToReshape
+ return [FlattenONNXToReshape]
+
+ def replace_op(self, graph: Graph, node: Node):
+ node_name = node.soft_get('name', node.id)
+ assert node.has_valid('axis'), 'The node "{}" does not have mandatory attribute "axis"'.format(node_name)
+
+ flatten_node = FlattenONNX(graph, {'name': node_name + '/FlattenONNX_', 'axis': node.axis}).create_node()
+ shape_node = Shape(graph, {'name': node_name + '/ShapeOf_'}).create_node()
+ softmax_node = Softmax(graph, {'name': node_name + '/Softmax_',
+ 'axis': 1,
+ 'framework_node_name': node_name,
+ 'rename_condition': lambda n: len(n.graph.get_op_nodes(name=node_name)) == 0
+ }).create_node()
+ reshape_node = Reshape(graph, {}).create_node()
+
+ rename_nodes([(node, node_name + '/delete'), (reshape_node, node_name)])
+
+ flatten_node.out_port(0).connect(softmax_node.in_port(0))
+ softmax_node.out_port(0).connect(reshape_node.in_port(0))
+ shape_node.out_port(0).connect(reshape_node.in_port(1))
+
+ source = node.in_port(0).get_source()
+
+ flatten_node.in_port(0).connect(source)
+ shape_node.in_port(0).connect(source)
+
+ return [reshape_node.id]
from mo.front.extractor import FrontExtractorOp
from mo.front.onnx.extractors.utils import onnx_attr
-from mo.ops.softmax import LogSoftmaxONNX, Softmax
+from mo.ops.softmax import LogSoftmaxONNX, SoftmaxONNX
class SoftmaxExtractor(FrontExtractorOp):
@classmethod
def extract(cls, node):
axis = onnx_attr(node, 'axis', 'i', default=1)
- Softmax.update_node_stat(node, {'axis': axis})
+ SoftmaxONNX.update_node_stat(node, {'axis': axis})
return cls.enabled
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementOp
from mo.front.tf.graph_utils import create_op_node_with_second_input
-from mo.graph.graph import Graph
+from mo.graph.graph import Graph, rename_nodes
from mo.ops.shape import Shape
from mo.ops.squeeze import Squeeze
node = match['op']
name = node.soft_get('name', node.id)
- shape_of = Shape(graph, {'name': name + '/shape_of'}).create_node()
- rank_1d = Shape(graph, {'name': name + '/rank_of'}).create_node()
- rank_0d = create_op_node_with_second_input(graph, Squeeze, int64_array([0]), {'name': name + '/0d_rank_of'},
- rank_1d)
+ assert node.has_valid('output_type'), \
+ 'Rank node should have `output_type` attribute, but it`s not for node {}'.format(name)
+
+ shape_of = Shape(graph, {'name': name + '/shape_of', 'output_type': node.output_type}).create_node()
+ rank_1d = Shape(graph, {'name': name + '/rank_of', 'output_type': node.output_type}).create_node()
+ rank_0d = create_op_node_with_second_input(
+ graph, Squeeze, int64_array(0), {'name': name + '/0d_rank_of'}, rank_1d)
shape_of.out_port(0).connect(rank_1d.in_port(0))
node.out_port(0).get_connection().set_source(rank_0d.out_port(0))
node.in_port(0).get_connection().set_destination(shape_of.in_port(0))
+
+ rename_nodes([(node, name + '/ToBeDeleted'), (rank_0d, name)])
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+from generator import generator, generate
+
+from extensions.front.rank_decomposer import RankDecomposer
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph, regular_op_with_empty_data, result, connect, \
+ valued_const_with_data
+
+nodes = lambda output_type: {
+ **regular_op_with_empty_data('input', {'type': 'Parameter'}),
+ **regular_op_with_empty_data('rank', {'op': 'Rank', 'type': None, 'output_type': output_type, 'name': 'my_rank'}),
+ **result(),
+
+ **regular_op_with_empty_data('shape', {'type': 'ShapeOf', 'output_type': output_type}),
+ **regular_op_with_empty_data('rank_1D', {'type': 'ShapeOf', 'output_type': output_type}),
+ **valued_const_with_data('zero', int64_array(0)),
+ **regular_op_with_empty_data('rank_0D', {'type': 'Squeeze'}),
+}
+
+
+@generator
+class RankDecomposerTest(unittest.TestCase):
+
+ @generate(np.int32, np.int64)
+ def test_rank_decomposer(self, output_type):
+ graph = build_graph(nodes_attrs=nodes(output_type), edges=[
+ *connect('input', 'rank'),
+ *connect('rank', 'output'),
+ ], nodes_with_edges_only=True)
+ RankDecomposer().find_and_replace_pattern(graph)
+
+ graph_ref = build_graph(nodes_attrs=nodes(output_type), edges=[
+ *connect('input', 'shape'),
+ *connect('shape', 'rank_1D'),
+ *connect('rank_1D', '0:rank_0D'),
+ *connect('zero', '1:rank_0D'),
+ *connect('rank_0D', 'output'),
+ ], nodes_with_edges_only=True)
+
+ (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+ self.assertTrue(flag, resp)
+ self.assertEqual(graph.get_op_nodes(type='Squeeze')[0]['name'], 'my_rank',
+ 'Name is not inherited from original node for RankDecomposer')
+ print(output_type)
+
+ def test_rank_decomposer_assertion(self):
+ graph = build_graph(nodes_attrs=nodes(None), edges=[
+ *connect('input', 'rank'),
+ *connect('rank', 'output'),
+ ], nodes_with_edges_only=True)
+ self.assertRaises(AssertionError, RankDecomposer().find_and_replace_pattern, graph)
from extensions.ops.ReduceOps import ReduceProd
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Graph, Node
-from mo.ops.const import Const
+from mo.front.tf.graph_utils import create_op_node_with_second_input
+from mo.graph.graph import Graph, rename_nodes
from mo.ops.shape import Shape
op = "Size"
enabled = True
- def replace_op(self, graph: Graph, node: Node):
- shape = Shape(graph, {'name': node.name + '/Shape/'}).create_node()
- reduce_prod = ReduceProd(graph, {'name': shape.name + 'ReduceProd/', 'keep_dims': False}).create_node()
- reduce_axis = Const(graph, {'value': int64_array([0])}).create_node()
+ def replace_sub_graph(self, graph: Graph, match: dict):
+ node = match['op']
+ name = node.soft_get('name', node.id)
+ assert node.has_valid('output_type'), \
+ 'Size node should have `output_type` attribute, but it`s not for node {}'.format(name)
- # Connect nodes
+ shape = Shape(graph, {'name': name + '/Shape/', 'output_type': node.output_type}).create_node()
node.in_port(0).get_connection().set_destination(shape.in_port(0))
- reduce_prod.in_port(0).get_connection().set_source(shape.out_port(0))
- reduce_prod.in_port(1).get_connection().set_source(reduce_axis.out_port(0))
+ reduce_prod = create_op_node_with_second_input(
+ graph, ReduceProd, int64_array([0]), {'name': shape.name + 'ReduceProd/', 'keep_dims': False}, shape)
+ node.out_port(0).get_connection().set_source(reduce_prod.out_port(0))
- # The "explicit" version of the return value is: [(out_node.id, 0)])
- return [reduce_prod.id]
+ rename_nodes([(node, name + '/ToBeDeleted'), (reduce_prod, name)])
See the License for the specific language governing permissions and
limitations under the License.
"""
+import numpy as np
+
from extensions.ops.argmax import ArgMaxOp
from mo.front.extractor import FrontExtractorOp
+from mo.front.tf.extractors.utils import tf_dtype_extractor
class ArgMaxFrontExtractor(FrontExtractorOp):
@classmethod
def extract(cls, node):
ArgMaxOp.update_node_stat(node, {'out_max_val': 0, 'top_k': 1, 'axis': None,
- 'dim_attrs': ['axis'], 'keepdims': 0, 'remove_values_output': True})
+ 'dim_attrs': ['axis'], 'keepdims': 0, 'remove_values_output': True,
+ 'output_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int64),
+ })
return cls.enabled
"""
import logging as log
+import numpy as np
+
from extensions.ops.non_max_suppression import NonMaxSuppression
from mo.front.extractor import FrontExtractorOp
@classmethod
def extract(cls, node):
- attrs = {'sort_result_descending': 1, 'center_point_box': 0}
+ attrs = {'sort_result_descending': 1, 'center_point_box': 0, 'output_type': np.int32}
NonMaxSuppression.update_node_stat(node, attrs)
return cls.enabled
if not pad_to_max_output_size:
log.warning('The attribute "pad_to_max_output_size" of node {} is equal to False which is not supported.'
'Forcing it to be equal to True'.format(node.soft_get('name')))
- attrs = {'sort_result_descending': 1, 'box_encoding': 'corner'}
+ attrs = {'sort_result_descending': 1, 'box_encoding': 'corner', 'output_type': np.int32}
NonMaxSuppression.update_node_stat(node, attrs)
return cls.enabled
if not pad_to_max_output_size:
log.warning('The attribute "pad_to_max_output_size" of node {} is equal to False which is not supported.'
'Forcing it to be equal to True'.format(node.soft_get('name')))
- attrs = {'sort_result_descending': 1, 'box_encoding': 'corner'}
+ attrs = {'sort_result_descending': 1, 'box_encoding': 'corner', 'output_type': np.int32}
NonMaxSuppression.update_node_stat(node, attrs)
return cls.enabled
@classmethod
def extract(cls, node: Node):
- Rank.update_node_stat(node)
+ Rank.update_node_stat(node, {'output_type': np.int32})
return cls.enabled
@classmethod
def extract(cls, node: Node):
- Shape.update_node_stat(node, {'data_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int32)})
+ Shape.update_node_stat(node, {'output_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int32)})
return cls.enabled
@classmethod
def extract(cls, node):
- Size.update_node_stat(node)
+ Size.update_node_stat(node, {'output_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int32)})
return cls.enabled
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+from generator import generator, generate
+
+from extensions.front.tf.SizeReplacer import SizeFrontReplacer
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph, regular_op_with_empty_data, result, connect, \
+ valued_const_with_data
+
+nodes = lambda output_type: {
+ **regular_op_with_empty_data('input', {'type': 'Parameter'}),
+ **regular_op_with_empty_data('size', {'op': 'Size', 'type': None, 'output_type': output_type, 'name': 'my_size'}),
+ **result(),
+
+ **regular_op_with_empty_data('shape', {'type': 'ShapeOf', 'output_type': output_type}),
+ **valued_const_with_data('zero', int64_array([0])),
+ **regular_op_with_empty_data('reduce', {'type': 'ReduceProd', 'keep_dims': False}),
+}
+
+
+@generator
+class SizeReplacerTest(unittest.TestCase):
+
+ @generate(np.int32, np.int64)
+ def test_size_replacer(self, output_type):
+ graph = build_graph(nodes_attrs=nodes(output_type), edges=[
+ *connect('input', 'size'),
+ *connect('size', 'output'),
+ ], nodes_with_edges_only=True)
+ SizeFrontReplacer().find_and_replace_pattern(graph)
+
+ graph_ref = build_graph(nodes_attrs=nodes(output_type), edges=[
+ *connect('input', 'shape'),
+ *connect('shape', '0:reduce'),
+ *connect('zero', '1:reduce'),
+ *connect('reduce', 'output'),
+ ], nodes_with_edges_only=True)
+
+ (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+ self.assertTrue(flag, resp)
+ self.assertEqual(graph.get_op_nodes(type='ReduceProd')[0]['name'], 'my_size',
+ 'Name is not inherited from original node for SizeReplacer')
+ print(output_type)
+
+ def test_size_replacer_assertion(self):
+ graph = build_graph(nodes_attrs=nodes(None), edges=[
+ *connect('input', 'size'),
+ *connect('size', 'output'),
+ ], nodes_with_edges_only=True)
+ self.assertRaises(AssertionError, SizeFrontReplacer().find_and_replace_pattern, graph)
See the License for the specific language governing permissions and
limitations under the License.
"""
+import numpy as np
from extensions.ops.topk import TopK
from mo.front.extractor import FrontExtractorOp
@classmethod
def extract(cls, node):
sort = 'value' if node.pb.attr['sorted'] else 'none'
- TopK.update_node_stat(node, {'mode': 'max', 'axis': -1, 'sort': sort, 'k' : node.pb.attr['k'].i})
+ TopK.update_node_stat(node, {'mode': 'max', 'axis': -1, 'sort': sort, 'k': node.pb.attr['k'].i,
+ 'index_element_type': np.int32})
return cls.enabled
@classmethod
def extract(cls, node):
sort = 'value' if node.pb.attr['sorted'] else 'none'
- TopK.update_node_stat(node, {'mode': 'max', 'axis': -1, 'sort': sort})
+ TopK.update_node_stat(node, {'mode': 'max', 'axis': -1, 'sort': sort, 'index_element_type': np.int32})
return cls.enabled
import numpy as np
+from extensions.middle.SliceLikeToStridedSlice import SliceLikeToStridedSlice
from mo.graph.graph import Graph
from mo.middle.replacement import MiddleReplacementPattern
from mo.ops.const import Const
from extensions.middle.pass_separator import MiddleStart
return [MiddleStart]
+ def run_before(self):
+ return [SliceLikeToStridedSlice]
+
def pattern(self):
return dict(
nodes=[
('const', dict(op='Const')),
('const_data', dict(kind='data')),
- ('slice_like', dict(op='Crop')),
+ ('slice_like', dict(op='slice_like')),
('slice_like_out', dict(kind='data')),
('reshape', dict(op='Reshape')),
],
def replace_pattern(self, graph: Graph, match: dict):
node = match['argmax']
+ node_name = node.soft_get('name', node.id)
connected_ports = [port for port in node.in_ports().values() if not port.disconnected()]
if len(connected_ports) == 2:
else:
axis = node.axis
- assert axis is not None, 'The "axis" should be defined for node "{}"'.format(node.soft_get('name'))
+ assert axis is not None, 'The "axis" should be defined for node "{}"'.format(node_name)
+ assert node.has_and_set('output_type'), 'The data type is not set for node "{}"'.format(node_name)
topk_node = TopK(graph, {'axis': axis, 'mode': 'max', 'sort': 'index',
- 'remove_values_output': node.has_and_set('remove_values_output')}).create_node()
+ 'remove_values_output': node.has_and_set('remove_values_output'),
+ 'index_element_type': node.output_type}).create_node()
node.in_port(0).get_connection().set_destination(topk_node.in_port(0))
if node.has_and_set('out_max_val'): # in this mode the ArgMax produces tuples (max_ind, max_value)
concat_node = Concat(graph, {'axis': 1, 'name': node.name + '/Concat'}).create_node()
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+from typing import Dict
+
+from mo.front.tf.graph_utils import create_op_with_const_inputs
+from mo.graph.graph import Graph, Node, rename_nodes
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.shape import Shape
+from mo.ops.strided_slice import StridedSlice
+from mo.utils.shape import get_shape_values_by_range_idxs, new_shape_node_from_shape_nodes, \
+ get_shape_and_rank_nodes_by_port
+
+
+class SliceLikeToStridedSlice(MiddleReplacementPattern):
+ """
+ Replace mxnet slice_like operation with StridedSlice in reshapable way.
+ The begin parameter for StridedSlice is always a zero vector.
+ The end parameter depends on the slice_like inputs and axes.
+
+ 1. If slice_like inputs has the same ranks, we can use second input shape (shape_like) as the end parameter for
+ StridedSlice. Axes parameter will form end_mask, that allows to use slice only on the desired axes.
+ Example:
+ input_shape = [1, 64, 128, 256], shape_like = [1, 2, 3, 4], axes = [2, 3].
+ In that case end = shape_like = [1, 2, 3, 4], but end_mask = [0, 0, 1, 1], so output_shape = [1, 64, 3, 4]
+
+ 2. Axes parameter has the last dimension of the first input shape (in that case shape_like >= input_shape).
+ Here we can use only a part of shape_like as the end parameter.
+ Example:
+ input_shape = [1, 64, 128, 256], shape_like = [1, 2, 3, 4, 5], axes = [2, 3].
+ end = shape_like[:4] = [1, 2, 3, 4], end_mask = [0, 0, 1, 1], output_shape = [1, 64, 3, 4]
+
+ 3. Usual case, where we form end parameter by concatenate parts of shape_like and input_shape.
+ Examples:
+ input_shape = [1, 64, 128, 256, 512], shape_like = [1, 2, 3, 4], axes = [2, 3].
+ end = shape_like[:4] + input_shape[4:] = [1, 2, 3, 4, 512],
+ end_mask = [0, 0, 1, 1, 0], output_shape = [1, 64, 3, 4, 512]
+
+ input_shape = [1, 64, 128, 256], shape_like = [1, 2, 3, 4, 5], axes = [0, 2].
+ end = shape_like[:3] + input_shape[3:] = [1, 2, 3, 256],
+ end_mask = [1, 0, 1, 0], output_shape = [1, 64, 3, 256]
+ """
+
+ enabled = True
+ graph_condition = [lambda graph: graph.graph['fw'] == 'mxnet']
+
+ @staticmethod
+ def pattern():
+ return dict(
+ nodes=[
+ ('op', dict(kind='op', op='slice_like'))
+ ],
+ edges=[]
+ )
+
+ @staticmethod
+ def replace_pattern(graph: Graph, match: Dict[str, Node]):
+ node = match['op']
+ name = node.soft_get('name', node.id)
+ input_shape = node.in_port(0).data.get_shape()
+ second_input_shape = node.in_port(1).data.get_shape()
+
+ begin_mask = np.zeros(len(input_shape), dtype=np.int64)
+ end_mask = np.zeros(len(input_shape), dtype=np.int64)
+
+ for i in node.axes:
+ end_mask[i] = np.int64(1)
+
+ new_axis_mask = np.zeros(len(input_shape), dtype=np.int64)
+ shrink_axis_mask = np.zeros(len(input_shape), dtype=np.int64)
+ ellipsis_mask = np.zeros(len(input_shape), dtype=np.int64)
+
+ ss = create_op_with_const_inputs(graph, StridedSlice,
+ port_value_dict={1: np.zeros(len(input_shape), dtype=np.int64)},
+ op_attrs={'name': 'StridedSlice', 'begin_mask': begin_mask,
+ 'end_mask': end_mask, 'new_axis_mask': new_axis_mask,
+ 'shrink_axis_mask': shrink_axis_mask,
+ 'ellipsis_mask': ellipsis_mask})
+ if input_shape.size == second_input_shape.size:
+ end = Shape(graph, dict(name=name + '/End')).create_node()
+ end.in_port(0).connect(node.in_port(1).get_source())
+ ss.in_port(2).connect(end.out_port(0))
+ else:
+ shape_like, rank_like = get_shape_and_rank_nodes_by_port(node.in_port(1).get_source())
+ end_first_part = get_shape_values_by_range_idxs(shape_like, rank_like, 0, node.axes[-1], include_end=True)
+ if input_shape.size - 1 == node.axes[-1]:
+ ss.in_port(2).connect(end_first_part.out_port(0))
+ else:
+ shape, rank = get_shape_and_rank_nodes_by_port(node.in_port(0).get_source())
+ end_second_part = get_shape_values_by_range_idxs(shape, rank, node.axes[-1], -1, include_begin=False,
+ include_end=True)
+ end = new_shape_node_from_shape_nodes([end_first_part, end_second_part])
+ ss.in_port(2).connect(end.out_port(0))
+
+ node.in_port(0).get_connection().set_destination(ss.in_port(0))
+ node.in_port(1).disconnect()
+ node.out_port(0).get_connection().set_source(ss.out_port(0))
+
+ rename_nodes([(node, name + '/ShouldBeDeleted'), (ss, name)])
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+from extensions.middle.SliceLikeToStridedSlice import SliceLikeToStridedSlice
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes = {
+ 'input': {'kind': 'op', 'op': 'Const'},
+ 'input_data': {'kind': 'data'},
+
+ 'shape_like_input': {'kind': 'op', 'op': 'Const'},
+ 'shape_like_input_data': {'kind': 'data'},
+
+ 'slice_like': {'kind': 'op', 'op': 'slice_like'},
+ 'slice_like_data': {'kind': 'data', 'shape': None, 'value': None},
+
+ 'result': {'kind': 'op', 'op': 'Result'},
+
+ 'shape': {'kind': 'op', 'op': 'ShapeOf'},
+ 'shape_data': {'kind': 'data'},
+ 'rank_1_d': {'kind': 'op', 'op': 'ShapeOf'},
+ 'rank_1_d_data': {'kind': 'data'},
+ 'rank': {'kind': 'op', 'op': 'Squeeze'},
+ 'rank_data': {'kind': 'data'},
+ 'rank_const': {'kind': 'op', 'op': 'Const'},
+ 'rank_const_data': {'kind': 'data'},
+
+ 'shape_like': {'kind': 'op', 'op': 'ShapeOf'},
+ 'shape_like_data': {'kind': 'data'},
+ 'rank_like_1_d': {'kind': 'op', 'op': 'ShapeOf'},
+ 'rank_like_1_d_data': {'kind': 'data'},
+ 'rank_like': {'kind': 'op', 'op': 'Squeeze'},
+ 'rank_like_const': {'kind': 'op', 'op': 'Const'},
+ 'rank_like_const_data': {'kind': 'data'},
+
+ 'begin': {'kind': 'op', 'op': 'Const'},
+ 'begin_data': {'kind': 'data'},
+ 'ss': {'kind': 'op', 'op': 'StridedSlice'},
+
+ 'start_idx_like': {'kind': 'op', 'op': 'Const'},
+ 'start_idx_like_data': {'kind': 'data'},
+ 'end_idx_like': {'kind': 'op', 'op': 'Const'},
+ 'end_idx_like_data': {'kind': 'data'},
+ 'end_idx_like_const': {'kind': 'op', 'op': 'Const'},
+ 'end_idx_like_const_data': {'kind': 'data'},
+ 'end_idx_like_add': {'kind': 'op', 'op': 'Add'},
+ 'end_idx_like_add_data': {'kind': 'data'},
+ 'delta_like': {'kind': 'op', 'op': 'Const'},
+ 'delta_like_data': {'kind': 'data'},
+ 'range_like': {'kind': 'op', 'op': 'Range'},
+ 'range_like_data': {'kind': 'data'},
+ 'gather_like': {'kind': 'op', 'op': 't_gather'},
+ 'gather_like_data': {'kind': 'data'},
+ 'gather_like_axis': {'kind': 'op', 'op': 'Const'},
+ 'gather_like_axis_data': {'kind': 'data'},
+ 'concat': {'kind': 'op', 'op': 'Concat'},
+ 'concat_data': {'kind': 'data'},
+
+ 'start_idx': {'kind': 'op', 'op': 'Const'},
+ 'start_idx_data': {'kind': 'data'},
+ 'start_idx_const': {'kind': 'op', 'op': 'Const'},
+ 'start_idx_const_data': {'kind': 'data'},
+ 'start_idx_add': {'kind': 'op', 'op': 'Add'},
+ 'start_idx_add_data': {'kind': 'data'},
+ 'end_idx': {'kind': 'op', 'op': 'Add'},
+ 'end_idx_data': {'kind': 'data'},
+ 'end_idx_axis': {'kind': 'op', 'op': 'Const'},
+ 'end_idx_axis_data': {'kind': 'data'},
+ 'end_idx_const': {'kind': 'op', 'op': 'Const'},
+ 'end_idx_const_data': {'kind': 'data'},
+ 'end_idx_add': {'kind': 'op', 'op': 'Add'},
+ 'end_idx_add_data': {'kind': 'data'},
+ 'delta': {'kind': 'op', 'op': 'Const'},
+ 'delta_data': {'kind': 'data'},
+ 'range': {'kind': 'op', 'op': 'Range'},
+ 'range_data': {'kind': 'data'},
+ 't_gather': {'kind': 'op', 'op': 't_gather'},
+ 'gather_data': {'kind': 'data'},
+ 'gather_axis': {'kind': 'op', 'op': 'Const'},
+ 'gather_axis_data': {'kind': 'data'}
+
+}
+
+edges = [
+ ('input', 'input_data'),
+ ('input_data', 'slice_like', {'in': 0}),
+ ('shape_like_input', 'shape_like_input_data'),
+ ('shape_like_input_data', 'slice_like', {'in': 1}),
+ ('slice_like', 'slice_like_data'),
+ ('slice_like_data', 'result')
+]
+
+same_input_shapes_dims_edges = [
+ ('input', 'input_data'),
+ ('input_data', 'ss', {'in': 0}),
+ ('ss', 'slice_like_data'),
+ ('slice_like_data', 'result'),
+ ('shape_like_input', 'shape_like_input_data'),
+ ('shape_like_input_data', 'shape_like'),
+ ('shape_like', 'shape_like_data'),
+ ('shape_like_data', 'ss', {'in': 2}),
+ ('begin', 'begin_data'),
+ ('begin_data', 'ss', {'in': 1})
+]
+
+shape_like_sub_graph_edges = [
+ ('input', 'input_data'),
+ ('input_data', 'ss', {'in': 0}),
+ ('ss', 'slice_like_data'),
+ ('slice_like_data', 'result'),
+ ('begin', 'begin_data'),
+ ('begin_data', 'ss', {'in': 1}),
+ ('shape_like_input', 'shape_like_input_data'),
+ ('shape_like_input_data', 'shape_like'),
+ ('shape_like', 'shape_like_data'),
+ ('shape_like_data', 'rank_like_1_d'),
+ ('rank_like_1_d', 'rank_like_1_d_data'),
+ ('rank_like_1_d_data', 'rank_like', {'in': 0}),
+ ('rank_like_const', 'rank_like_const_data'),
+ ('rank_like_const_data', 'rank_like', {'in': 1}),
+ ('end_idx_like', 'end_idx_like_data'),
+ ('end_idx_like_const', 'end_idx_like_const_data'),
+ ('end_idx_like_data', 'end_idx_like_add', {'in': 0}),
+ ('end_idx_like_const_data', 'end_idx_like_add', {'in': 1}),
+ ('end_idx_like_add', 'end_idx_like_add_data'),
+ ('end_idx_like_add_data', 'range_like', {'in': 1}),
+ ('start_idx_like', 'start_idx_like_data'),
+ ('start_idx_like_data', 'range_like', {'in': 0}),
+ ('delta_like', 'delta_like_data'),
+ ('delta_like_data', 'range_like', {'in': 2}),
+ ('range_like', 'range_like_data'),
+ ('range_like_data', 'gather_like', {'in': 1}),
+ ('shape_like_data', 'gather_like', {'in': 0}),
+ ('gather_like_axis', 'gather_like_axis_data'),
+ ('gather_like_axis_data', 'gather_like', {'in': 2}),
+ ('gather_like', 'gather_like_data')
+]
+
+last_axis_index = shape_like_sub_graph_edges + [('gather_like_data', 'ss', {'in': 2})]
+
+input_sub_graph_edges = [
+ ('input_data', 'shape'),
+ ('shape', 'shape_data'),
+ ('shape_data', 'rank_1_d'),
+ ('rank_1_d', 'rank_1_d_data'),
+ ('rank_1_d_data', 'rank', {'in': 0}),
+ ('rank_const', 'rank_const_data'),
+ ('rank_const_data', 'rank', {'in': 1}),
+ ('rank', 'rank_data'),
+ ('rank_data', 'end_idx', {'in': 0}),
+ ('end_idx_axis', 'end_idx_axis_data'),
+ ('end_idx_axis_data', 'end_idx', {'in': 1}),
+ ('end_idx', 'end_idx_data'),
+ ('end_idx_data', 'end_idx_add', {'in': 0}),
+ ('end_idx_const', 'end_idx_const_data'),
+ ('end_idx_const_data', 'end_idx_add', {'in': 1}),
+ ('start_idx', 'start_idx_data'),
+ ('start_idx_data', 'start_idx_add', {'in': 0}),
+ ('start_idx_const', 'start_idx_const_data'),
+ ('start_idx_const_data', 'start_idx_add', {'in': 1}),
+ ('end_idx_add', 'end_idx_add_data'),
+ ('start_idx_add', 'start_idx_add_data'),
+ ('delta', 'delta_data'),
+ ('start_idx_add_data', 'range', {'in': 0}),
+ ('end_idx_add_data', 'range', {'in': 1}),
+ ('delta_data', 'range', {'in': 2}),
+ ('range', 'range_data'),
+ ('range_data', 't_gather', {'in': 1}),
+ ('shape_data', 't_gather', {'in': 0}),
+ ('gather_axis', 'gather_axis_data'),
+ ('gather_axis_data', 't_gather', {'in': 2}),
+ ('t_gather', 'gather_data'),
+ ('gather_data', 'concat', {'in': 1}),
+ ('concat', 'concat_data'),
+ ('concat_data', 'ss', {'in': 2}),
+ ('gather_like_data', 'concat', {'in': 0})
+]
+
+input_part_shape_edges = shape_like_sub_graph_edges + input_sub_graph_edges
+
+
+class SliceLikeToStridedSliceTest(unittest.TestCase):
+
+ def test_1(self):
+ graph = build_graph(
+ nodes_attributes,
+ edges,
+ update_attributes={
+ 'input_data': {'shape': int64_array([1, 224, 224, 3])},
+ 'shape_like_input_data': {'shape': int64_array([2, 2, 2, 2])},
+ 'slice_like': {'axes': int64_array([2, 3])}
+ },
+ nodes_with_edges_only=True
+ )
+ SliceLikeToStridedSlice().find_and_replace_pattern(graph)
+ ref_graph = build_graph(
+ nodes_attributes,
+ same_input_shapes_dims_edges,
+ nodes_with_edges_only=True
+ )
+
+ flag, resp = compare_graphs(graph, ref_graph, 'result')
+ self.assertTrue(flag, resp)
+
+ def test_2(self):
+ graph = build_graph(
+ nodes_attributes,
+ edges,
+ update_attributes={
+ 'input_data': {'shape': int64_array([1, 224, 224, 3])},
+ 'shape_like_input_data': {'shape': int64_array([2, 2, 2, 2, 2])},
+ 'slice_like': {'axes': int64_array([2, 3])}
+ },
+ nodes_with_edges_only=True
+ )
+ SliceLikeToStridedSlice().find_and_replace_pattern(graph)
+ ref_graph = build_graph(
+ nodes_attributes,
+ last_axis_index,
+ nodes_with_edges_only=True
+ )
+
+ flag, resp = compare_graphs(graph, ref_graph, 'result')
+ self.assertTrue(flag, resp)
+
+ def test_3(self):
+ graph = build_graph(
+ nodes_attributes,
+ edges,
+ update_attributes={
+ 'input_data': {'shape': int64_array([1, 224, 224, 3])},
+ 'shape_like_input_data': {'shape': int64_array([2, 2, 2, 2, 2])},
+ 'slice_like': {'axes': int64_array([1, 2])}
+ },
+ nodes_with_edges_only=True
+ )
+ SliceLikeToStridedSlice().find_and_replace_pattern(graph)
+ ref_graph = build_graph(
+ nodes_attributes,
+ input_part_shape_edges,
+ nodes_with_edges_only=True
+ )
+ flag, resp = compare_graphs(graph, ref_graph, 'result')
+ self.assertTrue(flag, resp)
+++ /dev/null
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import logging as log
-
-import numpy as np
-
-from extensions.middle.ConvertGroupedStridedSlice import ConvertGroupedStridedSlice
-from extensions.middle.SliceConverter import ConvertSlice
-from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Graph
-from mo.middle.passes.eliminate import remove_op_node_with_data_node
-from mo.middle.replacement import MiddleReplacementPattern
-
-
-class UselessStridedSliceEraser(MiddleReplacementPattern):
- enabled = True
- force_shape_inference = True
-
- def run_before(self):
- return [ConvertGroupedStridedSlice]
-
- def run_after(self):
- return [ConvertSlice]
-
- @staticmethod
- def pattern():
- return dict(
- nodes=[('strided_slice', dict(kind='op', op='StridedSlice'))],
- edges=[]
- )
-
- @staticmethod
- def replace_pattern(graph: Graph, match: dict):
- node_ss = match['strided_slice']
- # slices = [elem for elem in node_ss.slices if elem is not None]
- # node_ss.slices = np.array(slices)
-
- if node_ss.out_port(0).data.get_value() is not None:
- # StridedSlices(SS) in shape-calculating sub-graphs that should not be deleted that easily
- # Example:
- # In RetinaNetFilteredDetectionsReplacement we have SS that slices first batch
- # We delete such SS for batch 1, but it should be performed while reshaping the model
- return
-
- output_data_node = node_ss.out_node(0)
- input_data_node = node_ss.in_node(0)
-
- out_shape = output_data_node.shape
-
- if not np.all(node_ss.shrink_axis_mask == 0):
- out_shape = list(out_shape)
- for i in range(len(node_ss.shrink_axis_mask)):
- if node_ss.shrink_axis_mask[i] == 1:
- out_shape.insert(i, 1)
- out_shape = int64_array(out_shape)
-
- if not np.all(node_ss.new_axis_mask == 0):
- out_shape = list(out_shape)
- for i in reversed(range(len(node_ss.new_axis_mask))):
- if node_ss.new_axis_mask[i] == 1:
- out_shape.pop(i)
- out_shape = int64_array(out_shape)
-
- if np.array_equal(input_data_node.shape, out_shape) and \
- all(elem.step == 1 for elem in match['strided_slice'].slices):
- if not np.all(node_ss.shrink_axis_mask == 0):
- ConvertGroupedStridedSlice.add_squeeze_for_shrink(graph, node_ss)
- if not np.all(node_ss.new_axis_mask == 0):
- ConvertGroupedStridedSlice.add_unsqueeze_for_new(graph, node_ss)
-
- log.info("Useless StridedSlice op '{}' has been detected".format(match['strided_slice'].id))
- # remove inputs to Strided Slice so it has just one input with data so we can use 'remove_op_node' function
- graph.remove_edge(match['strided_slice'].in_node(1).id, match['strided_slice'].id)
- graph.remove_edge(match['strided_slice'].in_node(2).id, match['strided_slice'].id)
- if len(match['strided_slice'].in_nodes()) > 3:
- graph.remove_edge(match['strided_slice'].in_node(3).id, match['strided_slice'].id)
-
- remove_op_node_with_data_node(graph, match['strided_slice'])
+++ /dev/null
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-import unittest
-
-from extensions.middle.UselessStridedSlice import UselessStridedSliceEraser
-from mo.front.common.partial_infer.utils import int64_array
-from mo.middle.passes.eliminate import shape_inference
-from mo.utils.ir_engine.compare_graphs import compare_graphs
-from mo.utils.unittest.graph import build_graph
-
-nodes_attributes = {
- # input data
- 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
- 'placeholder_data': {'value': None, 'shape': int64_array([4, 1, 6]), 'kind': 'data', 'data_type': None},
- #
- 'strided_slice': {'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice',
- 'shrink_axis_mask': int64_array([0, 0, 0]), 'new_axis_mask': int64_array([0, 0, 0]),
- 'slices': [slice(0, 4, 1), slice(0, 1, 1), slice(0, 6, 1)]},
- 'strided_slice_data': {'value': None, 'shape': int64_array([4, 1, 6]), 'kind': 'data'},
- 'strided_slice_input_1_data': {'value': None, 'shape': int64_array([3]), 'kind': 'data'},
- 'strided_slice_input_2_data': {'value': None, 'shape': int64_array([3]), 'kind': 'data'},
- 'strided_slice_input_3_data': {'value': None, 'shape': int64_array([3]), 'kind': 'data'},
- #
- 'strided_slice_2': {'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice',
- 'shrink_axis_mask': int64_array([0, 0, 0]), 'new_axis_mask': int64_array([0, 0, 0]),
- 'slices': [slice(0, 4, 1), slice(0, 1, 1), slice(0, 6, 1)]},
- 'strided_slice_2_data': {'value': None, 'shape': int64_array([4, 1, 6]), 'kind': 'data'},
- # Output operation
- 'output_op': {'kind': 'op', 'op': 'Result'},
- # squeeze op
- 'squeeze': {'type': 'Squeeze', 'kind': 'op', 'op': 'Squeeze'},
- 'squeeze_const': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': int64_array([1])},
- 'squeeze_const_data': {'kind': 'data'},
- # unsqueeze op
- 'unsqueeze': {'type': None, 'kind': 'op', 'op': 'Unsqueeze'},
- 'unsqueeze_const': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': int64_array([1])},
- 'unsqueeze_const_data': {'kind': 'data'},
- 'unsqueeze_data': {'value': None, 'shape': int64_array([4, 6]), 'kind': 'data'},
-}
-
-
-class UselessStridedSliceTests(unittest.TestCase):
- def test_single_stride_slice_removal(self):
- graph = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'strided_slice'),
- ('strided_slice_input_1_data', 'strided_slice'),
- ('strided_slice_input_2_data', 'strided_slice'),
- ('strided_slice_input_3_data', 'strided_slice'),
- ('strided_slice', 'strided_slice_data'),
- ('strided_slice_data', 'output_op'),
- ],
- {},
- nodes_with_edges_only=True
- )
-
- UselessStridedSliceEraser().find_and_replace_pattern(graph)
- shape_inference(graph)
-
- graph_ref = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'output_op'),
- ],
- {'placeholder_data': {'shape': int64_array([4, 1, 6])}},
- nodes_with_edges_only=True
- )
- (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
- self.assertTrue(flag, resp)
-
- def test_single_stride_slice_with_shrink_removal(self):
- graph = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'strided_slice'),
- ('strided_slice_input_1_data', 'strided_slice'),
- ('strided_slice_input_2_data', 'strided_slice'),
- ('strided_slice_input_3_data', 'strided_slice'),
- ('strided_slice', 'strided_slice_data'),
- ('strided_slice_data', 'output_op'),
- ],
- {'strided_slice': {'shrink_axis_mask': int64_array([0, 1, 0])},
- 'strided_slice_data': {'shape': int64_array([4, 6])}},
- nodes_with_edges_only=True
- )
- graph.graph['layout'] = 'NCHW'
-
- UselessStridedSliceEraser().find_and_replace_pattern(graph)
- shape_inference(graph)
-
- graph_ref = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'squeeze'),
- ('squeeze_const', 'squeeze_const_data'),
- ('squeeze_const_data', 'squeeze'),
- ('squeeze', 'strided_slice_data'),
- ('strided_slice_data', 'output_op')
- ],
- {'placeholder_data': {'shape': int64_array([4, 1, 6])},
- 'strided_slice_data': {'shape': int64_array([4, 6])}},
- nodes_with_edges_only=True
- )
- (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
- self.assertTrue(flag, resp)
-
- def test_single_stride_slice_with_new_removal(self):
- graph = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'strided_slice'),
- ('strided_slice_input_1_data', 'strided_slice'),
- ('strided_slice_input_2_data', 'strided_slice'),
- ('strided_slice_input_3_data', 'strided_slice'),
- ('strided_slice', 'strided_slice_data'),
- ('strided_slice_data', 'output_op'),
- ],
- {'strided_slice': {'new_axis_mask': int64_array([0, 1, 0, 0])},
- 'strided_slice_data': {'shape': int64_array([4, 1, 1, 6])}},
- nodes_with_edges_only=True
- )
- graph.graph['layout'] = 'NCHW'
-
- UselessStridedSliceEraser().find_and_replace_pattern(graph)
- shape_inference(graph)
-
- graph_ref = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'unsqueeze'),
- ('unsqueeze_const', 'unsqueeze_const_data'),
- ('unsqueeze_const_data', 'unsqueeze'),
- ('unsqueeze', 'strided_slice_data'),
- ('strided_slice_data', 'output_op')
- ],
- {'placeholder_data': {'shape': int64_array([4, 1, 6])},
- 'strided_slice_data': {'shape': int64_array([4, 1, 1, 6])}},
- nodes_with_edges_only=True
- )
- (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
- self.assertTrue(flag, resp)
-
- def test_single_stride_slice_with_shrink_and_new_removal(self):
- graph = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'strided_slice'),
- ('strided_slice_input_1_data', 'strided_slice'),
- ('strided_slice_input_2_data', 'strided_slice'),
- ('strided_slice_input_3_data', 'strided_slice'),
- ('strided_slice', 'strided_slice_data'),
- ('strided_slice_data', 'output_op'),
- ],
- {'strided_slice': {'shrink_axis_mask': int64_array([0, 1, 0, 0]),
- 'new_axis_mask': int64_array([0, 0, 1, 0])},
- 'strided_slice_data': {'shape': int64_array([4, 1, 6])}},
- nodes_with_edges_only=True
- )
- graph.graph['layout'] = 'NCHW'
-
- UselessStridedSliceEraser().find_and_replace_pattern(graph)
- shape_inference(graph)
-
- graph_ref = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'unsqueeze'),
- ('unsqueeze_const', 'unsqueeze_const_data'),
- ('unsqueeze_const_data', 'unsqueeze'),
- ('unsqueeze', 'unsqueeze_data'),
- ('unsqueeze_data', 'squeeze'),
- ('squeeze_const', 'squeeze_const_data'),
- ('squeeze_const_data', 'squeeze'),
- ('squeeze', 'strided_slice_data'),
- ('strided_slice_data', 'output_op')
- ],
- {'placeholder_data': {'shape': int64_array([4, 1, 6])},
- 'unsqueeze_data': {'shape': int64_array([4, 1, 1, 6])},
- 'strided_slice_data': {'shape': int64_array([4, 1, 6])},
- 'unsqueeze_const': {'value': int64_array([2])},
- },
- nodes_with_edges_only=True
- )
- (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
- self.assertTrue(flag, resp)
-
- def test_single_stride_slice_with_new_and_shrink_removal(self):
- graph = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'strided_slice'),
- ('strided_slice_input_1_data', 'strided_slice'),
- ('strided_slice_input_2_data', 'strided_slice'),
- ('strided_slice_input_3_data', 'strided_slice'),
- ('strided_slice', 'strided_slice_data'),
- ('strided_slice_data', 'output_op'),
- ],
- {'strided_slice': {'shrink_axis_mask': int64_array([0, 0, 1, 0]),
- 'new_axis_mask': int64_array([0, 1, 0, 0])},
- 'strided_slice_data': {'shape': int64_array([4, 1, 6])}},
- nodes_with_edges_only=True
- )
- graph.graph['layout'] = 'NCHW'
-
- UselessStridedSliceEraser().find_and_replace_pattern(graph)
- shape_inference(graph)
-
- graph_ref = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'unsqueeze'),
- ('unsqueeze_const', 'unsqueeze_const_data'),
- ('unsqueeze_const_data', 'unsqueeze'),
- ('unsqueeze', 'unsqueeze_data'),
- ('unsqueeze_data', 'squeeze'),
- ('squeeze_const', 'squeeze_const_data'),
- ('squeeze_const_data', 'squeeze'),
- ('squeeze', 'strided_slice_data'),
- ('strided_slice_data', 'output_op')
- ],
- {'unsqueeze_data': {'shape': int64_array([4, 1, 1, 6])},
- 'strided_slice_data': {'shape': int64_array([4, 1, 6])},
- 'squeeze_const': {'value': int64_array([2])},
- },
- nodes_with_edges_only=True
- )
-
- (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
- self.assertTrue(flag, resp)
-
- def test_consecutive_stride_slices_removal(self):
- graph = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'strided_slice'),
- ('strided_slice_input_1_data', 'strided_slice'),
- ('strided_slice_input_2_data', 'strided_slice'),
- ('strided_slice_input_3_data', 'strided_slice'),
- ('strided_slice', 'strided_slice_data'),
- ('strided_slice_data', 'strided_slice_2'),
- ('strided_slice_input_1_data', 'strided_slice_2'),
- ('strided_slice_input_2_data', 'strided_slice_2'),
- ('strided_slice_input_3_data', 'strided_slice_2'),
- ('strided_slice_2', 'strided_slice_2_data'),
- ('strided_slice_2_data', 'output_op'),
- ],
- {},
- nodes_with_edges_only=True
- )
-
- UselessStridedSliceEraser().find_and_replace_pattern(graph)
- shape_inference(graph)
-
- graph_ref = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'output_op'),
- ],
- {'placeholder_data': {'shape': int64_array([4, 1, 6])}}
- )
- (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
- self.assertTrue(flag, resp)
-
- def test_negative(self):
- graph = build_graph(nodes_attributes,
- [('placeholder', 'placeholder_data'),
- ('placeholder_data', 'strided_slice'),
- ('strided_slice_input_1_data', 'strided_slice'),
- ('strided_slice_input_2_data', 'strided_slice'),
- ('strided_slice_input_3_data', 'strided_slice'),
- ('strided_slice', 'strided_slice_data'),
- ('strided_slice_data', 'output_op'),
- ],
- {'strided_slice_data': {'value': []}},
- nodes_with_edges_only=True
- )
- graph_ref = graph.copy()
- UselessStridedSliceEraser().find_and_replace_pattern(graph)
- (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
- self.assertTrue(flag, resp)
import numpy as np
from extensions.middle.AnchorToPriorBox import AnchorToPriorBoxes
+from extensions.middle.SliceLikeToStridedSlice import SliceLikeToStridedSlice
from mo.graph.graph import Graph
from mo.middle.replacement import MiddleReplacementPattern
from mo.ops.const import Const
def run_after(self):
return [AnchorToPriorBoxes]
+ def run_before(self):
+ return [SliceLikeToStridedSlice]
+
def pattern(self):
return dict(
mandatory_props = {
'op': __class__.op,
'infer': __class__.infer,
- 'type': __class__.op,
+ 'type': None,
}
super().__init__(graph, mandatory_props, attrs)
mandatory_props = {
'op': __class__.op,
'type': 'Convert',
+ 'version': 'opset1',
'infer': __class__.infer,
'type_infer': __class__.type_infer,
'dst_type': None,
super().__init__(graph, {
'type': self.op,
'op': self.op,
+ 'version': 'opset1',
'in_ports_count': 3,
'out_ports_count': 1,
'infer': multi_box_detection_infer,
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'in_ports_count': 1,
'infer': Enter.enter_infer,
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': Exit.exit_infer,
'in_ports_count': 1,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'infer': __class__.infer,
'in_ports_count': 4,
'out_ports_count': 1,
mandatory_props = {
'op': __class__.op,
'type': __class__.op,
+ 'version': 'opset1',
'infer': __class__.infer,
'in_ports_count': 4,
'out_ports_count': 1,
+++ /dev/null
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-import numpy as np
-
-from mo.graph.graph import Node, Graph
-from mo.ops.op import Op
-
-
-class LogOp(Op):
- op = 'Log'
-
- def __init__(self, graph: Graph, attrs: dict):
- mandatory_props = {
- 'type': __class__.op,
- 'op': __class__.op,
- 'infer': __class__.infer,
- 'in_ports_count': 1,
- 'out_ports_count': 1,
- }
- super().__init__(graph, mandatory_props, attrs)
-
- @staticmethod
- def infer(node: Node):
- assert len(node.in_nodes()) == 1
- assert len(node.out_nodes()) == 1
- input_node = node.in_node()
-
- node.out_node(0).shape = input_node.shape.copy()
- if input_node.has_valid('value'):
- node.out_node(0).value = np.array(np.log(input_node.value))
mandatory_props = {
'type': self.op,
'op': self.op,
+ 'version': 'opset1',
'transpose_a': False,
'transpose_b': False,
'infer': __class__.infer,
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'in_ports_count': 1,
'infer': NextIteration.enter_infer,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'infer': __class__.infer,
'in_ports_count': 4,
'out_ports_count': 1,
super().__init__(graph, {
'op': self.op,
'type': self.op_type,
+ 'version': 'opset1',
'infer': reduce_infer,
'keep_dims': 0,
'in_ports_count': 2,
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': TensorArray.array_infer,
}
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': TensorArrayGather.array_infer,
}
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': TensorArrayReader.array_infer,
}
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': TensorArrayScatter.array_infer,
}
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': TensorArraySize.array_infer,
}
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': TensorArrayWriter.array_infer,
}
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'extension',
'top_height': 0,
'top_width': 0,
'size_divisible_by': 0,
'type': self.op,
'op': self.op,
'operation': self.operation,
+ 'version': 'opset1',
'infer': self.infer,
'in_ports_count': 1,
'out_ports_count': 1,
'type': __class__.op,
'op': __class__.op,
'infer': ArgMaxOp.argmax_infer,
+ 'output_type': np.int64,
'in_ports_count': 2,
'out_ports_count': 1,
}
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': None
}, attrs)
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'in_ports_count': 5,
'out_ports_count': 1,
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'coord_start': 2,
'force_suppress': False,
'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'extension',
'type_infer': self.type_infer,
'infer': self.infer,
'in_ports_count': 2,
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'input_as_shape': 1,
'in_ports_count': 1,
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': None
}, attrs)
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'extension',
'in_ports_count': 1,
'out_ports_count': 1,
'infer': CorrelationOp.corr_infer
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'in_ports_count': 2,
'out_ports_count': 1,
'infer': CTCGreedyDecoderOp.ctc_greedy_decoder_infer
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'extension',
'in_ports_count': 1,
'out_ports_count': 1,
'infer': DataAugmentationOp.data_augmentation_infer
mandatory_props = {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'mode': 'blocks_first',
mandatory_props = dict(
type=__class__.op,
op=__class__.op,
+ version='experimental',
infer=__class__.infer,
type_infer=self.type_infer,
in_ports_count=4,
super().__init__(graph, {
'op': self.op,
'type': self.op_type,
+ 'version': 'opset1',
'infer': lambda node: eltwise_infer(node, self.operation),
'type_infer': self.type_infer,
'can_be_bias': True,
enabled = False
op = 'Round'
op_type = None
+ version = 'extension'
operation = staticmethod(lambda a: np.round(a))
+++ /dev/null
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import numpy as np
-
-from mo.graph.graph import Node, Graph
-from mo.ops.op import Op
-
-
-class ExpOp(Op):
- op = 'Exp'
-
- def __init__(self, graph: Graph, attrs: dict):
- mandatory_props = {
- 'type': __class__.op,
- 'op': __class__.op,
- 'infer': __class__.infer,
- 'in_ports_count': 1,
- 'out_ports_count': 1,
- }
- super().__init__(graph, mandatory_props, attrs)
-
- @staticmethod
- def infer(node: Node):
- assert len(node.in_nodes()) == 1
- assert len(node.out_nodes()) == 1
- input_node = node.in_node()
- assert input_node.has_valid('shape')
- node.out_node().shape = input_node.shape.copy()
- if input_node.has_valid('value'):
- node.out_node().value = np.exp(input_node.value)
+++ /dev/null
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import unittest
-
-import numpy as np
-
-from extensions.ops.exp import ExpOp
-from mo.graph.graph import Node
-from mo.utils.unittest.graph import build_graph
-
-nodes_attributes = {'node_1': {'op': 'Identity', 'kind': 'op'},
- 'exp': {'op': 'Exp', 'kind': 'op'},
- 'node_3': {'op': 'Identity', 'kind': 'op'},
- 'op_output': {'kind': 'op', 'op': 'Result'}
- }
-
-
-class TestExpOp(unittest.TestCase):
- def test_shape_only(self):
- graph = build_graph(nodes_attributes,
- [('node_1', 'exp'),
- ('exp', 'node_3'),
- ('node_3', 'op_output')
- ],
- {'node_3': {'shape': None},
- 'node_1': {'shape': np.array([1, 3, 10, 20])},
- })
-
- exp_node = Node(graph, 'exp')
- ExpOp.infer(exp_node)
- exp_shape = np.array([1, 3, 10, 20])
- res_shape = graph.node['node_3']['shape']
- for i in range(0, len(exp_shape)):
- self.assertEqual(exp_shape[i], res_shape[i])
-
- def test_shape_and_value(self):
- graph = build_graph(nodes_attributes,
- [('node_1', 'exp'),
- ('exp', 'node_3'),
- ('node_3', 'op_output')
- ],
- {
- 'node_3': {
- 'shape': None,
- 'value': None,
- },
- 'node_1': {
- 'shape': np.array([2]),
- 'value': np.array([0, 1], dtype=np.float32),
- },
- })
-
- exp_node = Node(graph, 'exp')
- ExpOp.infer(exp_node)
- exp_shape = np.array([2])
- exp_value = np.array([1, 2.7182818], dtype=np.float32)
- res_shape = graph.node['node_3']['shape']
- res_value = graph.node['node_3']['value']
- for i in range(0, len(exp_shape)):
- self.assertEqual(exp_shape[i], res_shape[i])
- for i in range(0, len(exp_value)):
- self.assertAlmostEqual(exp_value[i], res_value[i], places=6)
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'levels': None,
'is_eltwise': True,
# flag to switch between dumping FakeQuantize as statistics and keeping it as layer in IR
super().__init__(graph, {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'infer': self.infer,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'in_ports_count': 1,
'out_ports_count': 1,
'infer': copy_shape_infer
super().__init__(graph, {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'infer': self.infer,
'in_ports_count': 3,
'out_ports_count': 1,
mandatory_props = {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'axes': None,
'mode': None,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'infer': __class__.infer,
'in_ports_count': 5,
'out_ports_count': 2,
'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset2',
'eps': None,
'across_channels': 0,
'normalize_variance': 1,
from mo.front.common.partial_infer.utils import int64_array
from mo.graph.graph import Node, Graph
+from mo.middle.passes.convert_data_type import np_data_type_to_destination_type
from mo.ops.op import Op
+from mo.utils.error import Error
class NonMaxSuppression(Op):
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset3',
'infer': __class__.infer,
+ 'output_type': np.int64,
'center_point_box': 0,
'box_encoding': 'corner',
'in_ports_count': 5,
def supported_attrs(self):
if self.ir_version < 10:
- return [
- 'center_point_box',
- ]
+ return ['center_point_box']
else:
- return [
- 'sort_result_descending',
- 'box_encoding'
- ]
-
+ version = self.get_opset()
+ if version == 'opset3':
+ return ['sort_result_descending', 'box_encoding',
+ ('output_type', lambda node: np_data_type_to_destination_type(node.output_type))]
+ elif version == 'opset1':
+ return ['sort_result_descending', 'box_encoding']
+ else:
+ raise Error('Unsupported operation opset version "{}"'.format(version))
@staticmethod
def infer(node: Node):
@staticmethod
def type_infer(node):
- node.out_port(0).set_data_type(np.int64 if node.graph.graph['cmd_params'].generate_experimental_IR_V10 else
- np.int32)
+ if not node.graph.graph['cmd_params'].generate_experimental_IR_V10:
+ node.out_port(0).set_data_type(np.int32)
+ else:
+ if node.get_opset() == 'opset3':
+ node.out_port(0).set_data_type(node.output_type)
+ else:
+ node.out_port(0).set_data_type(np.int64)
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.non_max_suppression import NonMaxSuppression
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph, regular_op_with_shaped_data, valued_const_with_data, result, \
+ connect, FakeAttr
+
+
+class TestNonMaxSuppressionInfer(unittest.TestCase):
+ def setUp(self):
+ nodes = {
+ **regular_op_with_shaped_data('boxes', [10, 100, 4], {'type': 'Parameter'}),
+ **regular_op_with_shaped_data('scores', [10, 5, 100], {'type': 'Parameter'}),
+ **valued_const_with_data('max_output_per_class', int64_array(10)),
+ **regular_op_with_shaped_data('nms', None, {'op': 'NonMaxSuppression', 'type': 'NonMaxSuppression',
+ 'name': 'nms'}),
+ **result('output'),
+ }
+
+ self.graph = build_graph(nodes, [
+ *connect('boxes', '0:nms'),
+ *connect('scores', '1:nms'),
+ *connect('max_output_per_class', '2:nms'),
+ *connect('nms', 'output'),
+ ], nodes_with_edges_only=True)
+
+ def test_nms_infer_v7(self):
+ self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=False, ir_version=7)
+
+ nms_node = Node(self.graph, 'nms')
+ nms_node['version'] = 'opset1'
+ NonMaxSuppression.infer(nms_node)
+ NonMaxSuppression.type_infer(nms_node)
+
+ self.assertTrue(np.array_equal(nms_node.out_port(0).data.get_shape(), [100, 3]))
+ self.assertTrue(nms_node.out_port(0).get_data_type() == np.int32)
+
+ def test_nms_infer_v10_opset1(self):
+ self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+ nms_node = Node(self.graph, 'nms')
+ nms_node['version'] = 'opset1'
+ NonMaxSuppression.infer(nms_node)
+ NonMaxSuppression.type_infer(nms_node)
+
+ self.assertTrue(np.array_equal(nms_node.out_port(0).data.get_shape(), [100, 3]))
+ self.assertTrue(nms_node.out_port(0).get_data_type() == np.int64)
+
+ def test_nms_infer_v10_i64_opset3(self):
+ self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+ nms_node = Node(self.graph, 'nms')
+ nms_node['version'] = 'opset3'
+ nms_node['output_type'] = np.int64
+ NonMaxSuppression.infer(nms_node)
+ NonMaxSuppression.type_infer(nms_node)
+
+ self.assertTrue(np.array_equal(nms_node.out_port(0).data.get_shape(), [100, 3]))
+ self.assertTrue(nms_node.out_port(0).get_data_type() == np.int64)
+
+ def test_nms_infer_v10_i32_opset3(self):
+ self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+ nms_node = Node(self.graph, 'nms')
+ nms_node['version'] = 'opset3'
+ nms_node['output_type'] = np.int32
+ NonMaxSuppression.infer(nms_node)
+ NonMaxSuppression.type_infer(nms_node)
+
+ self.assertTrue(np.array_equal(nms_node.out_port(0).data.get_shape(), [100, 3]))
+ self.assertTrue(nms_node.out_port(0).get_data_type() == np.int32)
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
'eps': None,
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'eps': None,
'p': None,
'eps_mode': None,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'axis': -1,
'infer': __class__.infer,
'on_value': None,
mandatory_props = {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'infer': self.infer,
'is_input': True,
super().__init__(graph, {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'extension',
'in_ports_count': 1,
'out_ports_count': 1,
'infer': copy_shape_infer
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'extension',
'in_ports_count': 1,
'out_ports_count': 1,
'infer': PredictionHeatmapOp.infer
super().__init__(graph, {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'infer': self.infer,
mandatory_props = {
'type': self.op,
'op': self.op,
+ 'version': 'opset1',
'flip': 1,
'max_size': np.array([]),
'min_size': np.array([]),
mandatory_props = {
'type': self.op,
'op': self.op,
+ 'version': 'opset1',
'in_ports_count': 2,
'out_ports_count': 1,
'infer': self.priorbox_clustered_infer,
mandatory_props = dict(
type=__class__.op,
op=__class__.op,
+ version='experimental',
infer=__class__.infer,
)
super().__init__(graph, mandatory_props, attrs)
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'post_nms_topn': 300, # default in caffe-shared
'infer': ProposalOp.proposal_infer,
'in_ports_count': 3,
mandatory_props = dict(
type=__class__.op,
op=__class__.op,
+ version='experimental',
infer=__class__.infer
)
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset2',
'mode': 'average',
'in_ports_count': 2,
'out_ports_count': 1,
updated_attrs = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'mode': 'bilinear_deformable',
'in_ports_count': 3,
'trans_std': 0,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'in_ports_count': 3,
'out_ports_count': 1,
'infer': __class__.infer,
See the License for the specific language governing permissions and
limitations under the License.
"""
+import numpy as np
from mo.graph.graph import Graph
from mo.ops.op import Op
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'op': __class__.op,
'type': None,
+ 'op': self.op,
+
+ 'output_type': np.int64,
'infer': None,
+
'in_ports_count': 1,
'out_ports_count': 1,
}
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'in_ports_count': 1,
'out_ports_count': 1,
'infer': RegionYoloOp.regionyolo_infer
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset2',
'infer': ReorgYoloOp.reorgyolo_infer
}
super().__init__(graph, mandatory_props, attrs)
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
'type': __class__.op,
+ 'version': 'opset1',
'seq_axis': None,
'batch_axis': 0,
'op': __class__.op,
mandatory_props = dict(
type=__class__.op,
op=__class__.op,
+ version='experimental',
infer=__class__.infer,
in_ports_count=5,
out_ports_count=2,
mandatory_props = {
'op': __class__.op,
'type': __class__.op,
+ 'version': 'opset1',
'in_ports_count': 3,
'out_ports_count': 1,
'infer': __class__.infer,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'in_ports_count': 3,
'out_ports_count': 1,
'infer': SimplerNMSOp.simplernms_infer
limitations under the License.
"""
-import networkx as nx
import numpy as np
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
from mo.ops.op import Op
class Size(Op):
op = 'Size'
- def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+ def __init__(self, graph: Graph, attrs: dict):
+ assert 'output_type' in attrs, 'Size has mandatory `output_type` attribute'
+
mandatory_props = {
- 'op': __class__.op,
- 'infer': __class__.infer,
+ 'type': None,
+ 'op': self.op,
+
+ 'output_type': np.int64,
+ 'infer': self.infer,
}
super().__init__(graph, mandatory_props, attrs)
@staticmethod
def infer(node: Node):
- size = np.prod(node.in_node().shape)
- value = np.array(size, dtype=np.int)
- node.out_node().shape = np.array(value.shape, dtype=np.int64)
+ name = node.soft_get('name', node.id)
+ connected_in_ports = [port for port in node.in_ports().values() if not port.disconnected()]
+ assert len(connected_in_ports) == 1, \
+ 'Size operation should have exact one input node, but it has {}'.format(len(connected_in_ports))
+
+ input_shape = node.in_port(0).data.get_shape()
+ assert input_shape is not None, \
+ 'Input shape is undefined for Size node `{}`'.format(node.soft_get('name', node.id))
+
+ assert node.has_valid('output_type'), \
+ '`output_type` attribute is not set for Size node `{}`'.format(name)
+ assert node.output_type in [np.int64, np.int32], \
+ 'Size `output_type` attribute must be int32 or int64, `{}` found'.format(np.dtype(node.output_type).name)
+
+ node.out_port(0).data.set_value(np.array(np.prod(input_shape), dtype=node.output_type))
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.front.caffe.extractors.utils import get_canonical_axis_index
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph
+from mo.ops.op import Op
+
+
+class SliceLike(Op):
+ op = 'slice_like'
+ enabled = True
+
+ def __init__(self, graph: Graph, attrs: dict):
+ assert 'axes' in attrs, 'Please set mandatory `axes` attribute for `slice_like` operation'
+ super().__init__(graph, {
+ 'type': None,
+ 'op': self.op,
+ 'in_ports_count': 2,
+ 'out_ports_count': 1,
+ 'infer': self.infer,
+ }, attrs)
+
+ @staticmethod
+ def infer(node):
+ input_shape = node.in_port(0).data.get_shape()
+ shape_like = node.in_port(1).data.get_shape()
+
+ new_shape = np.copy(input_shape)
+ if node.axes is not None:
+ node.axes = sorted([get_canonical_axis_index(input_shape, i) for i in node.axes])
+ for i in node.axes:
+ new_shape[i] = shape_like[i]
+ else:
+ assert input_shape.size == shape_like.size,\
+ 'Input shape ranks are inconsistent: {} and {}'.format(input_shape.size, shape_like.size)
+ node.axes = int64_array(range(shape_like.size))
+ new_shape = np.copy(shape_like)
+
+ node.out_port(0).data.set_shape(new_shape)
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+import numpy as np
+
+from extensions.ops.slice_like import SliceLike
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes = {
+ 'input_data': {'kind': 'data', 'shape': int64_array([3, 4]), 'value': None},
+ 'shape_like_data': {'kind': 'data', 'shape': int64_array([2, 3]), 'value': None},
+ 'slice_like': {'kind': 'op', 'op': 'slice_data'},
+ 'out_data': {'kind': 'data', 'shape': None, 'value': None}
+}
+
+edges = [
+ ('input_data', 'slice_like', {'in': 0}),
+ ('shape_like_data', 'slice_like', {'in': 1}),
+ ('slice_like', 'out_data')
+]
+
+
+class SliceLikeTest(unittest.TestCase):
+
+ def test_1(self):
+ graph = build_graph(nodes_attributes, edges, {'slice_like': {'axes': None}})
+ slice_like = Node(graph, 'slice_like')
+ SliceLike.infer(slice_like)
+ ref_shape = int64_array([2, 3])
+ res_shape = graph.node['out_data']['shape']
+ self.assertTrue(np.array_equal(res_shape, ref_shape))
+
+ def test_2(self):
+ graph = build_graph(nodes_attributes, edges, {'slice_like': {'axes': (0, 1)}})
+ slice_like = Node(graph, 'slice_like')
+ SliceLike.infer(slice_like)
+ ref_shape = int64_array([2, 3])
+ res_shape = graph.node['out_data']['shape']
+ self.assertTrue(np.array_equal(res_shape, ref_shape))
+
+ def test_3(self):
+ graph = build_graph(nodes_attributes, edges, {'slice_like': {'axes': (0,)}})
+ slice_like = Node(graph, 'slice_like')
+ SliceLike.infer(slice_like)
+ ref_shape = int64_array([2, 4])
+ res_shape = graph.node['out_data']['shape']
+ self.assertTrue(np.array_equal(res_shape, ref_shape))
+
+ def test_4(self):
+ graph = build_graph(nodes_attributes, edges, {'slice_like': {'axes': (-1,)}})
+ slice_like = Node(graph, 'slice_like')
+ SliceLike.infer(slice_like)
+ ref_shape = int64_array([3, 3])
+ res_shape = graph.node['out_data']['shape']
+ self.assertTrue(np.array_equal(res_shape, ref_shape))
mandatory_props = {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'mode': 'blocks_first',
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'infer': __class__.infer,
'in_ports_count': 4,
'out_ports_count': 3
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
- 'kind': 'op',
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'infer': self.infer,
'in_ports_count': 3,
def infer(node: Node):
input_indices_shape = node.in_port(0).data.get_shape()
input_shape_value = node.in_port(1).data.get_value()
- input_shape_shape = node.in_port(1).data.get_shape()
new_shape_value = node.in_port(2).data.get_value()
new_shape_shape = node.in_port(2).data.get_shape()
output_indices_shape = np.concatenate((input_indices_shape[0:1], new_shape_shape))
node.out_port(0).data.set_shape(output_indices_shape)
- #TODO: implement for constant input indices value
+ # TODO: implement for constant input indices value
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'infer': __class__.infer,
'in_ports_count': 3,
'out_ports_count': 1,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'infer': __class__.infer,
'in_ports_count': 3,
'out_ports_count': 1,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'infer': __class__.infer,
'in_ports_count': 3,
'out_ports_count': 1,
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'type_infer': self.type_infer,
'infer': self.infer,
'in_ports_count': 4,
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'reduce_op': None,
'type_infer': self.type_infer,
'infer': self.infer,
'in_ports_count': 6,
'out_ports_count': 1,
- 'version': 'experimental',
}, attrs)
def supported_attrs(self):
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'in_ports_count': 1,
'out_ports_count': 1,
'infer': SpatialTransformOp.sp_infer
super().__init__(graph, {
'op': self.op,
'type': 'VariadicSplit',
+ 'version': 'opset1',
'infer': self.infer,
super().__init__(graph, {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'infer': self.infer,
super().__init__(graph, {
'op': self.op,
'type': 'Split',
+ 'version': 'opset1',
'axis': 1,
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'type': __class__.op,
+ 'type': None,
'op': __class__.op,
'identity': True,
'in_ports_count': 1,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'input_port_map': [], # a list of dicts with such attrs as external_port_id, etc.
'output_port_map': [], # a list of dicts with such attrs as external_port_id, etc.
'back_edges': [], # a list of dicts with such attrs as from_layer, from_port, etc.
import numpy as np
from mo.graph.graph import Graph
+from mo.middle.passes.convert_data_type import np_data_type_to_destination_type
from mo.ops.op import Op, PermuteAttrs
from mo.utils.error import Error
super().__init__(graph, {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset3',
'infer': self.infer,
'type_infer': self.type_infer,
+
+ 'index_element_type': np.int32,
'axis': None,
'mode': 'max',
'sort': 'none',
}, attrs)
def backend_attrs(self):
- return ['axis', 'mode', 'sort']
+ version = self.get_opset()
+ if version == 'opset3':
+ return ['axis', 'mode', 'sort',
+ ('index_element_type', lambda node: np_data_type_to_destination_type(node.index_element_type))]
+ elif version == 'opset1':
+ return ['axis', 'mode', 'sort']
+ else:
+ raise Error('Unknown opset version "{}"'.format(version))
@staticmethod
def infer(node):
@staticmethod
def type_infer(node):
node.out_port(0).set_data_type(node.in_port(0).get_data_type())
- node.out_port(1).set_data_type(np.int32)
+ if node.get_opset() == 'opset3':
+ node.out_port(1).set_data_type(node.index_element_type)
+ else:
+ node.out_port(1).set_data_type(np.int32)
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.topk import TopK
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph, regular_op_with_shaped_data, valued_const_with_data, result, \
+ connect, FakeAttr
+
+
+class TestTopKInfer(unittest.TestCase):
+ def setUp(self):
+ nodes = {
+ **regular_op_with_shaped_data('data', [20, 100, 4], {'type': 'Parameter', 'value': None,
+ '_out_port_data_type': {0: np.float32}}),
+ **valued_const_with_data('k', int64_array(10)),
+ **regular_op_with_shaped_data('topk', None, {'op': 'TopK', 'type': 'TopK', 'name': 'topk', 'axis': 1}),
+ 'topk_d2': {'kind': 'data', 'shape': None, 'value': None},
+ **result('output_1'),
+ **result('output_2'),
+ }
+
+ self.graph = build_graph(nodes, [
+ *connect('data', '0:topk'),
+ *connect('k', '1:topk'),
+ ('topk', 'topk_d', {'out': 0}),
+ ('topk', 'topk_d2', {'out': 1}),
+ ('topk_d', 'output_1'),
+ ('topk_d2', 'output_2'),
+ ], nodes_with_edges_only=True)
+
+ def test_topk_infer_v7(self):
+ self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=False, ir_version=7)
+
+ topk_node = Node(self.graph, 'topk')
+ topk_node['version'] = 'opset1'
+ TopK.infer(topk_node)
+ TopK.type_infer(topk_node)
+
+ self.assertTrue(np.array_equal(topk_node.out_port(0).data.get_shape(), int64_array([20, 10, 4])))
+ self.assertTrue(np.array_equal(topk_node.out_port(1).data.get_shape(), int64_array([20, 10, 4])))
+ self.assertTrue(topk_node.out_port(0).get_data_type() == np.float32)
+ self.assertTrue(topk_node.out_port(1).get_data_type() == np.int32)
+
+ def test_topk_infer_v10_opset1(self):
+ self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+ topk_node = Node(self.graph, 'topk')
+ topk_node['version'] = 'opset1'
+ TopK.infer(topk_node)
+ TopK.type_infer(topk_node)
+
+ self.assertTrue(np.array_equal(topk_node.out_port(0).data.get_shape(), int64_array([20, 10, 4])))
+ self.assertTrue(np.array_equal(topk_node.out_port(1).data.get_shape(), int64_array([20, 10, 4])))
+ self.assertTrue(topk_node.out_port(0).get_data_type() == np.float32)
+ self.assertTrue(topk_node.out_port(1).get_data_type() == np.int32)
+
+ def test_topk_infer_v10_i64_opset3(self):
+ self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+ topk_node = Node(self.graph, 'topk')
+ topk_node['version'] = 'opset3'
+ topk_node['index_element_type'] = np.int64
+ TopK.infer(topk_node)
+ TopK.type_infer(topk_node)
+
+ self.assertTrue(np.array_equal(topk_node.out_port(0).data.get_shape(), int64_array([20, 10, 4])))
+ self.assertTrue(np.array_equal(topk_node.out_port(1).data.get_shape(), int64_array([20, 10, 4])))
+ self.assertTrue(topk_node.out_port(0).get_data_type() == np.float32)
+ self.assertTrue(topk_node.out_port(1).get_data_type() == np.int64)
+
+ def test_topk_infer_v10_i32_opset3(self):
+ self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+ topk_node = Node(self.graph, 'topk')
+ topk_node['version'] = 'opset3'
+ topk_node['index_element_type'] = np.int32
+ TopK.infer(topk_node)
+ TopK.type_infer(topk_node)
+
+ self.assertTrue(np.array_equal(topk_node.out_port(0).data.get_shape(), int64_array([20, 10, 4])))
+ self.assertTrue(np.array_equal(topk_node.out_port(1).data.get_shape(), int64_array([20, 10, 4])))
+ self.assertTrue(topk_node.out_port(0).get_data_type() == np.float32)
+ self.assertTrue(topk_node.out_port(1).get_data_type() == np.int32)
mandatory_props = dict(
type=__class__.op,
op=__class__.op,
+ version='experimental',
infer=__class__.infer
)
super().__init__(graph, mandatory_props, attrs)
super().__init__(graph, {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'infer': self.infer,
'force_precision_in_ports': {1: 'int64'},
'in_ports_count': 2,
mandatory_props = {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'experimental',
'infer': __class__.infer,
'in_ports_count': 1,
'out_ports_count': 3
raise Error(
'Unexpected exception happened during extracting attributes for node {}.' +
'\nOriginal exception message: {}',
- new_attrs['name'] if 'name' in new_attrs else '<UNKNOWN>',
+ node,
str(e)
) from e
if supported:
attrs = self.graph.node[self.node]
if not k in attrs:
raise AttributeError("Attribute {} missing in {} node".format(k, self.name))
+ if k == 'version' and attrs.get(k, v) != v:
+ raise AttributeError("Attribute 'version' cannot be updated in {} node".format(self.name))
+
attrs[k] = v
def __getattr__(self, k):
return self.graph.node[self.node][k]
def __setitem__(self, k, v):
+ if k == 'version' and self.graph.node[self.node].get(k, v) != v:
+ raise AttributeError("Attribute 'version' cannot be updated in {} node".format(self.name))
self.graph.node[self.node][k] = v
def __contains__(self, k):
if idx not in self._out_ports:
self.add_output_port(idx=idx)
+ def get_opset(self):
+ """
+ Gets the operation set version where the operation was introduced.
+ If the version is not defined then consider it an extension
+ :return: the string with the opset name
+ """
+ return self.soft_get('version', 'extension')
+
class Graph(nx.MultiDiGraph):
def __init__(self, data=None, **attr):
from extensions.back.SpecialNodesFinalization import RemoveConstOps, CreateConstNodesReplacement, RemoveOutputOps, \
NormalizeTI
+from mo.utils.get_ov_update_message import get_ov_update_message
from mo.graph.graph import Graph
from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively, for_each_sub_graph_recursively
from mo.pipeline.common import prepare_emit_ir, get_ir_version
if argv.generate_deprecated_IR_V7:
from mo.middle.passes.convert_data_type import SUPPORTED_DATA_TYPES
SUPPORTED_DATA_TYPES['bool'] = (np.bool, 'I32', 'boolean')
- return driver(argv)
+
+ ov_update_message = None
+ if not hasattr(argv, 'silent') or not argv.silent:
+ ov_update_message = get_ov_update_message()
+ ret_code = driver(argv)
+ if ov_update_message:
+ print(ov_update_message)
+ return ret_code
except (FileNotFoundError, NotADirectoryError) as e:
log.error('File {} was not found'.format(str(e).split('No such file or directory:')[1]))
log.debug(traceback.format_exc())
try:
if node.value.dtype in [np.float32, np.float64, np.float16] and not node.has_and_set('correct_data_type'):
convert_node_blobs(graph, node, data_type_str_to_np(data_type_str))
- # convert all I64 to I32 since plugins don't support I64:
- if node.value.dtype == np.int64:
- convert_node_blobs(graph, node, np.int32)
except Exception as e:
raise Error('Coudn\'t convert blob {}, details: {}', node.soft_get('name'), e) from e
limitations under the License.
"""
-import numpy as np
-
from mo.graph.graph import Node, Graph
from mo.graph.perm_inputs import PermuteInputs
from mo.ops.op import Op
+from mo.utils.broadcasting import bi_directional_shape_broadcasting, uni_directional_shape_broadcasting, \
+ uni_directional_broadcasting, bi_directional_broadcasting
+from mo.utils.error import Error
class Broadcast(Op):
Inputs:
[0] - tensor to be broadcasted
[1] - shape to be broadcast to
- [2] - optional axis paramater that which axis are allowed to be broadcasted
+ [2] - optional axis parameter that which axis are allowed to be broadcasted
"""
op = 'Broadcast'
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset3',
+ 'mode': 'numpy',
'in_ports_count': 3,
'out_ports_count': 1,
'force_precision_in_ports':
'infer': __class__.infer,
}, attrs)
+ def supported_attrs(self):
+ return ['mode']
+
@staticmethod
def infer(node: Node):
- # TODO Add necessary checks and asserts
- b_value = node.in_port(0).data.get_value()
- b_shape = node.in_port(1).data.get_value()
- assert b_shape is not None
- node.out_port(0).data.set_shape(b_shape)
+ node_name = node.soft_get('name', node.id)
- PermuteInputs().set_input_permutation(node.in_node(1), node, 'output:0', 'shape')
+ input_shape = node.in_port(0).data.get_shape()
+ input_value = node.in_port(0).data.get_value()
+ target_shape = node.in_port(1).data.get_value()
+ assert target_shape is not None, 'Output shape is not defined for node "{}"'.format(node_name)
+ assert node.has_and_set('mode'), 'Broadcasting mode is not defined for node "{}"'.format(node_name)
- if b_value is not None and not node.has_and_set('stop_value_propagation'):
- new_value = np.broadcast_to(b_value, b_shape)
- node.out_port(0).data.set_value(new_value)
+ if node.mode == 'numpy':
+ node.out_port(0).data.set_shape(uni_directional_shape_broadcasting(input_shape, target_shape))
+ elif node.mode == 'bidirectional':
+ node.out_port(0).data.set_shape(bi_directional_shape_broadcasting(input_shape, target_shape))
+ else:
+ raise Error('The node "{}" has unsupported mode "{}"'.format(node_name, node.mode))
+
+ PermuteInputs().set_input_permutation(node.in_node(1), node, 'output:0', 'shape')
+ if input_value is not None and not node.has_and_set('stop_value_propagation'):
+ if node.mode == 'numpy':
+ node.out_port(0).data.set_value(uni_directional_broadcasting(input_value, target_shape))
+ elif node.mode == 'bidirectional':
+ node.out_port(0).data.set_value(bi_directional_broadcasting(input_value, target_shape))
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'infer': copy_shape_infer,
'in_ports_count': 1,
'out_ports_count': 1,
super().__init__(graph, {
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'axis': 1,
'infer': concat_infer,
'out_ports_count': 1,
super().__init__(graph, {
'type': self.op,
'op': self.op,
+ 'version': 'opset1',
'infer': self.infer,
'value': None,
'shape': None,
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'infer': __class__.infer,
'multiplication_transparent': True,
'multiplication_transparent_ports': [(0, 0), (1, 0)],
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
'infer': __class__.infer,
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'infer': __class__.infer,
'in_ports_count': 3,
'out_ports_count': 1,
node.graph.remove_edge(node.in_node(2).id, node.id)
node['shape_input'] = False
else:
- node['force_precision_in_ports'] = {2: 'int64'}
\ No newline at end of file
+ node['force_precision_in_ports'] = {2: 'int64'}
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'infer': Convolution.infer,
'multiplication_transparent': True,
'multiplication_transparent_ports': [(0, 0), (2, 0)],
super().__init__(graph, {
'type': self.op,
'op': self.op,
+ 'version': 'opset1',
'infer': self.infer,
super().__init__(graph, {
'op': self.op,
'type': 'Norm',
+ 'version': 'opset1',
'bias': 1,
'region': 'across',
node[k] = v
node.update_node()
+ def get_opset(self):
+ """
+ Gets the operation set version where the operation was introduced.
+ If the version is not defined then consider it an extension
+ :return: the string with the opset name
+ """
+ return self.attrs.get('version', 'extension')
+
+
@classmethod
def update_node_stat(cls, node: Node, attrs: dict = None):
if attrs is None:
super().__init__(graph, {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'infer': __class__.infer,
'in_ports_count': 4,
'out_ports_count': 1,
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
'infer': __class__.infer,
'in_ports_count': 1,
'out_ports_count': 1,
super().__init__(graph, {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'infer': self.infer,
super().__init__(graph, {
'op': __class__.op,
'type': __class__.op,
+ 'version': 'opset1',
'infer': lambda x: None,
'value': None,
'data_type': None,
def __init__(self, graph, attrs: dict):
super().__init__(graph, {
+ 'type': __class__.op,
+ 'op': __class__.op,
+ 'version': 'opset2',
'pooled_h': None,
'pooled_w': None,
'spatial_scale': 0.0625,
- 'type': __class__.op,
- 'op': __class__.op,
'infer': roipooling_infer,
'in_ports_count': 2,
'out_ports_count': 1,
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
'infer': copy_shape_infer,
- 'kind': 'op',
'type': __class__.op,
'op': __class__.op,
'in_ports_count': 3,
limitations under the License.
"""
-import logging as log
-
import numpy as np
from mo.graph.graph import Graph
+from mo.middle.passes.convert_data_type import np_data_type_to_destination_type
from mo.ops.op import Op
+from mo.utils.error import Error
class Shape(Op):
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'type': __class__.op,
- 'op': __class__.op,
- 'infer': __class__.infer,
+ 'type': self.op,
+ 'op': self.op,
+ 'version': 'opset3',
+
+ 'output_type': np.int64,
+ 'infer': self.infer,
'type_infer': self.type_infer,
+
'in_ports_count': 1,
'out_ports_count': 1,
}, attrs)
- def supported_attrs(self):
- return []
+ def backend_attrs(self):
+ version = self.get_opset()
+ if version == 'opset3':
+ return [
+ ('output_type', lambda node: np_data_type_to_destination_type(node.output_type)),
+ ]
+ elif version == 'opset1':
+ return []
+ else:
+ raise Error('Unknown opset version "{}"'.format(version))
+
@staticmethod
def infer(node):
- if len(node.in_nodes()) != 1:
- log.warning('ShapeOf operation should have exact one input node, but it has {}'.format(len(node.in_nodes())))
- return
-
- if node.in_node(0).shape is not None:
- value = np.array(node.in_node(0).shape)
- node.out_node().shape = np.array(value.shape, dtype=np.int64)
-
- if not node.has_and_set('stop_value_propagation'):
- if node.has_valid('data_type'):
- node.out_node().value = np.array(value, dtype=node.data_type)
- else:
- node.out_node().value = np.array(value)
- node.out_node().shape = np.array(value.shape, dtype=np.int64)
+ name = node.soft_get('name', node.id)
+ connected_in_ports = [port for port in node.in_ports().values() if not port.disconnected()]
+ assert len(connected_in_ports) == 1, \
+ 'ShapeOf operation should have exact one input node, but it has {}'.format(len(connected_in_ports))
+
+ input_shape = node.in_port(0).data.get_shape()
+ assert input_shape is not None, \
+ 'Input shape is undefined for ShapeOf node `{}`'.format(node.soft_get('name', node.id))
+
+ assert node.has_valid('output_type'), \
+ '`output_type` attribute is not set for ShapeOf node `{}`'.format(name)
+ assert node.output_type in [np.int64, np.int32], \
+ 'ShapeOf `output_type` attribute must be int32 or int64, `{}` found'.format(np.dtype(node.output_type).name)
+
+ if node.has_and_set('stop_value_propagation'):
+ node.out_port(0).data.set_shape(input_shape.shape)
else:
- log.info('Can\'t infer shape and value for shape operation due to undefined input shape')
+ node.out_port(0).data.set_value(np.array(input_shape, dtype=node.output_type))
@staticmethod
def type_infer(node):
- node.out_port(0).set_data_type(np.int64 if node.graph.graph['cmd_params'].generate_experimental_IR_V10 else
- np.int32)
+ node.out_port(0).set_data_type(node.output_type)
def __init__(self, graph: Graph, attrs: dict):
super().__init__(graph, {
- 'infer': Softmax.infer,
- 'kind': 'op',
- 'axis': 1,
'type': __class__.op,
'op': __class__.op,
+ 'version': 'opset1',
+ 'infer': Softmax.infer,
+ 'axis': 1,
'in_ports_count': 1,
'out_ports_count': 1,
}, attrs)
PermuteAttrs.create_permute_attrs(node, attrs=[('axis', 'input:0')])
+class SoftmaxONNX(Op):
+ op = 'SoftMaxONNX'
+ enabled = False
+
+ def __init__(self, graph: Graph, attrs: dict):
+ super().__init__(graph, {
+ 'infer': None,
+ 'axis': 1,
+ 'type': None, # this operation will be replaced with a
+ # Reshape(Softmax(Flatten(x, axis), -1), x.shape) sub-graph
+ 'op': __class__.op,
+ 'in_ports_count': 1,
+ 'out_ports_count': 1,
+ }, attrs)
+
+
class LogSoftmax(Op):
op = 'LogSoftmax'
enabled = False
def __init__(self, graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'op': self.op,
'type': self.op,
'in_ports_count': 3,
def __init__(self, graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'op': __class__.op,
'type': __class__.op,
+ 'version': 'opset1',
'squeeze_dims': None,
'reinterp_shape': True,
'keep_at_least_1d': 0,
super().__init__(graph, {
'type': __class__.op,
'op': 'StridedSlice',
+ 'version': 'opset1',
'in_ports_count': 4,
'out_ports_count': 1,
'infer': __class__.infer
super().__init__(graph, {
'op': self.op,
'type': self.op,
+ 'version': 'opset1',
'infer': self.infer,
super().__init__(graph, {
'op': self.op,
'type': 'Tile',
+ 'version': 'opset1',
'infer': self.infer,
def __init__(self, graph, attrs: dict):
super().__init__(graph, {
- 'kind': 'op',
'op': __class__.op,
'type': __class__.op,
+ 'version': 'opset1',
'unsqueeze_dims': None,
'reinterp_shape': True,
'in_ports_count': 2,
in_port.get_connection().insert_node(Cast(graph, {'dst_type': np_type}).create_node())
-def convert_outputs_of_specific_ops(graph: Graph):
- type_port = {'ShapeOf': {0: 'int32'},
- 'NonMaxSuppression': {0: 'int32'},
- }
-
- for node in graph.get_op_nodes():
- if node.soft_get('type') in type_port:
- ports_to_update = type_port[node.soft_get('type')]
- for port_id, precision in ports_to_update.items():
- if port_id in node.out_ports():
- log.debug('Insert Convert after op "{}" to type "{}"'.format(node.soft_get('name', node.id),
- precision))
- node.out_port(port_id).get_connection().insert_node(
- Cast(graph, {'dst_type': data_type_str_to_np(precision)}).create_node())
-
-
def prepare_emit_ir(graph: Graph, data_type: str, output_dir: str, output_model_name: str,
mean_data: [list, None] = None, input_names: list = None, meta_info: dict = None):
if input_names is None:
# restore data type for specific inputs/outputs of specific ops to the data types required by nGraph
if not graph.graph['cmd_params'].generate_deprecated_IR_V7:
for_graph_and_each_sub_graph_recursively(graph, convert_inputs_of_specific_ops)
- for_graph_and_each_sub_graph_recursively(graph, convert_outputs_of_specific_ops)
if graph.graph['cmd_params'].generate_experimental_IR_V10:
for_graph_and_each_sub_graph_recursively(graph, OpVersioning().find_and_replace_pattern)
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import logging as log
+import numpy as np
+
+from mo.front.common.partial_infer.utils import int64_array
+
+
+def make_equal_rank(shape_1: np.array, shape_2: np.array):
+ """
+ Prepend shape with smaller length with 1. Return updates shapes
+ :param shape_1: first shape
+ :param shape_2: second shape
+ :return: tuple with updated shapes
+ """
+ while len(shape_1) < len(shape_2):
+ shape_1 = np.insert(shape_1, 0, 1)
+
+ while len(shape_2) < len(shape_1):
+ shape_2 = np.insert(shape_2, 0, 1)
+
+ return shape_1, shape_2
+
+
+def uni_directional_shape_broadcasting(input_shape: np.array, target_shape: np.array):
+ """
+ Uni-directional broadcasting of two shapes following the numpy semantic
+ :param input_shape: input shape to broadcast
+ :param target_shape: target shape
+ :return: broadcasted shape or None if broadcasting cannot be performed
+ """
+ input = input_shape.copy()
+
+ # in one-directional broadcasting the target shape rank can be higher or equal than input shape
+ if len(input_shape) > len(target_shape):
+ log.debug('The shape "{}" cannot be broadcasted to "{}"'.format(input_shape, target_shape))
+ return None
+
+ # prepend input shape with 1s
+ input, target_shape = make_equal_rank(input, target_shape)
+
+ for left, right in zip(input, target_shape):
+ if left != right and left != 1:
+ log.debug('The shape "{}" cannot be broadcasted to "{}"'.format(input_shape, target_shape))
+ return None
+
+ return target_shape
+
+
+def bi_directional_shape_broadcasting(input_shape_1: np.array, input_shape_2: np.array):
+ """
+ Bi-directional broadcasting of two shapes following numpy semantic
+ :param input_shape_1: first shape to broadcast
+ :param input_shape_2: second shape to broadcast
+ :return: broadcasted shape or None if broadcasting cannot be performed
+ """
+ shape_1 = input_shape_1.copy()
+ shape_2 = input_shape_2.copy()
+ shape_1, shape_2 = make_equal_rank(shape_1, shape_2)
+
+ for left, right in zip(shape_1, shape_2):
+ if left != right and left != 1 and right != 1:
+ log.debug('The shape "{}" cannot be broadcasted to "{}"'.format(input_shape_1, input_shape_2))
+ return None
+
+ return np.maximum(shape_1, shape_2)
+
+
+def uni_directional_broadcasting(input_value: np.array, target_shape: np.array):
+ """
+ Uni-directional broadcasting of input tensor to target shape following the numpy semantic
+ :param input_value: input value to broadcast
+ :param target_shape: target shape
+ :return: broadcasted value
+ """
+ assert uni_directional_shape_broadcasting(int64_array(input_value.shape), target_shape) is not None, \
+ 'The tensor of shape "{}" cannot be uni-directionally broadcasted to shape "{}"'.format(input_value.shape,
+ target_shape)
+ return np.broadcast_to(input_value, target_shape)
+
+
+def bi_directional_broadcasting(input_value: np.array, second_shape: np.array):
+ """
+ Bi-directional broadcasting of input tensor to target shape following the numpy semantic
+ :param input_value: input value to broadcast
+ :param second_shape: second tensor shape
+ :return: broadcasted value
+ """
+ assert bi_directional_shape_broadcasting(int64_array(input_value.shape), second_shape) is not None, \
+ 'The tensor of shape "{}" cannot be bi-directionally broadcasted to shape "{}"'.format(input_value.shape,
+ second_shape)
+ return np.array(input_value * np.ones(second_shape), dtype=input_value.dtype)
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.broadcasting import bi_directional_broadcasting, bi_directional_shape_broadcasting, uni_directional_broadcasting, uni_directional_shape_broadcasting
+from generator import generator, generate
+import numpy as np
+import unittest
+
+
+@generator
+class TestingBroadcasting(unittest.TestCase):
+ @generate(*[([], [20, 30, 10], [20, 30, 10]),
+ ([1], [20, 30, 10], [20, 30, 10]),
+ ([1, 1, 10], [20, 30, 10], [20, 30, 10]),
+ ([20, 1, 10], [20, 30, 10], [20, 30, 10]),
+ ([20, 30, 10], [20, 30, 10], [20, 30, 10]),
+ ([20, 30, 10], [5, 7, 20, 30, 10], [5, 7, 20, 30, 10]),
+ ([1, 2], [20, 3, 10, 2], [20, 3, 10, 2]),
+ ([1, 1], [1], None),
+ ([5, 10], [1, 10], None),
+ ])
+ def test_uni_directional_broadcasting(self, input_shape, target_shape, expected_shape):
+ self.assertTrue(np.array_equal(uni_directional_shape_broadcasting(input_shape, target_shape), expected_shape))
+
+ input_value = np.array(np.random.rand(*input_shape))
+ if expected_shape is not None:
+ expected_value = np.broadcast_to(input_value, int64_array(target_shape))
+ self.assertTrue(np.array_equal(uni_directional_broadcasting(input_value, int64_array(target_shape)), expected_value))
+ else:
+ with self.assertRaisesRegex(Exception, '.*cannot be uni-directionally broadcasted.*'):
+ uni_directional_broadcasting(input_value, int64_array(target_shape))
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import datetime
+
+msg_fmt = 'It\'s been a while, check for a new version of ' + \
+ 'Intel(R) Distribution of OpenVINO(TM) toolkit here {0} or on the GitHub*'
+
+
+def get_ov_update_message():
+ expected_update_date = datetime.date(year=2020, month=10, day=1)
+ current_date = datetime.date.today()
+
+ link = 'https://software.intel.com/en-us/openvino-toolkit/choose-download?cid=&source=upgrade&content=2020_3_LTS'
+
+ return msg_fmt.format(link) if current_date >= expected_update_date else None
eps = 5e-2
else:
eps = 1e-4
- return np.allclose(value_ref, value, rtol=eps, atol=eps)
\ No newline at end of file
+ return np.allclose(value_ref, value, rtol=eps, atol=eps)
+
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.middle.passes.convert_data_type import destination_type_to_np_data_type
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
+
+
+class NonMaxSuppressionExtender(Extender):
+ op = 'NonMaxSuppression'
+
+ @staticmethod
+ def extend(op: Node):
+ if op.has_valid('output_type'):
+ op['output_type'] = destination_type_to_np_data_type(op.output_type)
--- /dev/null
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.middle.passes.convert_data_type import destination_type_to_np_data_type
+
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
+
+
+class ShapeOfExtender(Extender):
+ op = 'ShapeOf'
+
+ @staticmethod
+ def extend(op: Node):
+ op['output_type'] = destination_type_to_np_data_type(op.output_type)
limitations under the License.
"""
+from mo.middle.passes.convert_data_type import destination_type_to_np_data_type
from mo.utils.graph import Node
from mo.utils.ir_reader.extender import Extender
-class TopK_extender(Extender):
+class TopKExtender(Extender):
op = 'TopK'
@staticmethod
def extend(op: Node):
if op.graph.graph['cmd_params'].framework in ('tf', 'caffe'):
op['remove_values_output'] = True
+ if op.has_valid('index_element_type'):
+ op['index_element_type'] = destination_type_to_np_data_type(op.index_element_type)
from extensions.ops.gather import Gather
from extensions.ops.range import Range
from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Node
+from mo.front.tf.graph_utils import create_op_node_with_second_input
+from mo.graph.graph import Node, Graph
+from mo.graph.port import Port
from mo.ops.concat import Concat
from mo.ops.const import Const
+from mo.ops.shape import Shape
+from mo.ops.squeeze import Squeeze
def get_canonical_axis_index_node(rank: Node, axis: int) -> Node:
new_shape_node.add_input_port(ind)
new_shape_node.in_port(ind).connect(input_node.out_port(0))
return new_shape_node
+
+
+def get_shape_and_rank_nodes_by_port(port: Port, return_as_a_scalar: bool = True):
+ """
+ The function returns nodes producing shape and rank of the data from the desired port in order to use those
+ operations on the middle/back phase
+ :param port: Port object that specifies node output port
+ :param return_as_a_scalar: boolean flag to return 1D or 0D rank
+ :return: shape and rank nodes
+ """
+ input_node_name = port.node.soft_get('name', port.node.id)
+ graph = port.node.graph
+
+ shape = Shape(graph, dict(name=input_node_name + '/ShapeOf')).create_node()
+ rank_1_d = Shape(graph, dict(name=input_node_name + '/1dRankOf')).create_node()
+ rank_1_d.in_port(0).connect(shape.out_port(0))
+ shape.in_port(0).connect(port)
+ if not return_as_a_scalar:
+ return shape, rank_1_d
+
+ rank = create_op_node_with_second_input(graph, Squeeze, int64_array([0]), {'name': input_node_name + '/0dRankOf'},
+ rank_1_d)
+ return shape, rank
-Subproject commit a909d3e0b6d12036be4d913e43b18408bd8bf0b6
+Subproject commit 2989542b5d1b656b19012edb119b5b379dbedc8a
3. Benchmark demo using public SqueezeNet topology (demo_benchmark_app.sh|bat)
-To run the demos, run demo_squeezenet_download_convert_run.sh or demo_security_barrier_camera.sh or demo_benchmark_app.sh (*.bat on Windows) scripts from the console without parameters, for example:
+4. Speech recognition demo utilizing models trained on open LibriSpeech dataset
+
+To run the demos, run demo_squeezenet_download_convert_run.sh or demo_security_barrier_camera.sh or demo_benchmark_app.sh or demo_speech_recognition.sh (*.bat on Windows) scripts from the console without parameters, for example:
./demo_squeezenet_download_convert_run.sh
The benchmark app prints performance counters, resulting latency, and throughput values.
-For more information about the Inference Engine benchmark app, refer to the documentation available in the sample folder.
\ No newline at end of file
+For more information about the Inference Engine benchmark app, refer to the documentation available in the sample folder.
+
+Speech Recognition Demo Using LibriSpeech models
+================================================
+
+The demo illustrates live speech recognition - transcribing speech from microphone or offline (from wave file).
+The demo is also capable of live close captioning of an audio clip or movie, where signal is intercepted from the speaker.
+
+The demo script does the following:
+
+ - Downloads US English models trained on LibriSpeech dataset prepared for direct usage by the Inference Engine
+ - Installs the required components
+ - Runs the command line offline demo
+ - As a final step, runs live speech recognition application with graphical interface
+
+The GUI application prints the speech transcribed from input signal in window. Up to two channels can be transcribed in parallel: microphone & speakers streams.
set ir_dir=%irs_path%\%model_dir%\%target_precision%
echo Download public %model_name% model
-echo python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
-python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
+echo python "%downloader_dir%\downloader.py" --name "%model_name%" --output_dir "%models_path%" --cache_dir "%models_cache%"
+python "%downloader_dir%\downloader.py" --name "%model_name%" --output_dir "%models_path%" --cache_dir "%models_cache%"
echo %model_name% model downloading completed
timeout 7
-if exist %ir_dir% (
+if exist "%ir_dir%" (
echo.
echo Target folder %ir_dir% already exists. Skipping IR generation with Model Optimizer.
echo If you want to convert a model again, remove the entire %ir_dir% folder.
echo ###############^|^| Build Inference Engine samples using MS Visual Studio (MSBuild.exe) ^|^|###############
echo.
timeout 3
-echo !MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:benchmark_app /clp:ErrorsOnly /m
+echo "!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:benchmark_app /clp:ErrorsOnly /m
"!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:benchmark_app /clp:ErrorsOnly /m
if ERRORLEVEL 1 GOTO errorHandling
printf "Install Model Optimizer dependencies\n\n"
cd "${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/install_prerequisites"
. ./install_prerequisites.sh caffe
- cd $cur_path
+ cd "$cur_path"
# Step 3. Convert a model with Model Optimizer
printf "${dashes}"
set models_path=%BUILD_FOLDER%\openvino_models\ir
set models_cache=%BUILD_FOLDER%\openvino_models\cache
-if not exist %models_cache% (
- mkdir %models_cache%
+if not exist "%models_cache%" (
+ mkdir "%models_cache%"
)
set downloader_dir=%INTEL_OPENVINO_DIR%\deployment_tools\open_model_zoo\tools\downloader
set ir_dir=%irs_path%\%model_dir%\%target_precision%
echo Download public %model_name% model
-echo python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
-python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
+echo python "%downloader_dir%\downloader.py" --name "%model_name%" --output_dir "%models_path%" --cache_dir "%models_cache%"
+python "%downloader_dir%\downloader.py" --name "%model_name%" --output_dir "%models_path%" --cache_dir "%models_cache%"
echo %model_name% model downloading completed
timeout 7
-if exist %ir_dir% (
+if exist "%ir_dir%" (
echo.
echo Target folder %ir_dir% already exists. Skipping IR generation with Model Optimizer.
echo If you want to convert a model again, remove the entire %ir_dir% folder.
echo ###############^|^| Build Inference Engine samples using MS Visual Studio (MSBuild.exe) ^|^|###############
echo.
timeout 3
-echo !MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:classification_sample_async /clp:ErrorsOnly /m
+echo "!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:classification_sample_async /clp:ErrorsOnly /m
"!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:classification_sample_async /clp:ErrorsOnly /m
if ERRORLEVEL 1 GOTO errorHandling
printf "Install Model Optimizer dependencies\n\n"
cd "${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/install_prerequisites"
. ./install_prerequisites.sh caffe
- cd $cur_path
+ cd "$cur_path"
# Step 3. Convert a model with Model Optimizer
printf "${dashes}"
else
export LD_LIBRARY_PATH=$HDDL_INSTALL_DIR/lib:$INSTALLDIR/deployment_tools/inference_engine/external/gna/lib:$INSTALLDIR/deployment_tools/inference_engine/external/mkltiny_lnx/lib:$INSTALLDIR/deployment_tools/inference_engine/external/tbb/lib:$IE_PLUGINS_PATH:$LD_LIBRARY_PATH
fi
+
+ export KMB_INSTALL_DIR=$INSTALLDIR/deployment_tools/inference_engine/external/hddl_unite
+ export LD_LIBRARY_PATH=$KMB_INSTALL_DIR/lib:$LD_LIBRARY_PATH
fi
if [ -e $INSTALLDIR/deployment_tools/ngraph ]; then
export LD_LIBRARY_PATH=$INSTALLDIR/deployment_tools/ngraph/lib:$LD_LIBRARY_PATH
export ngraph_DIR=$INSTALLDIR/deployment_tools/ngraph/cmake
fi
-
+
if [ -e "$INSTALLDIR/opencv" ]; then
if [ -f "$INSTALLDIR/opencv/setupvars.sh" ]; then
source "$INSTALLDIR/opencv/setupvars.sh"
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <irs_path>
+ <value>/nfs/inn/proj/vdp/vdp_tests/stress_tests/master_04d6f112132f92cab563ae7655747e0359687dc9/</value>
+ </irs_path>
+</attributes>
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <models>
+ <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1321668" vmpeak="1631245" vmrss="657919" vmhwm="967408" />
+ <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1563796" vmpeak="2064987" vmrss="1227532" vmhwm="1728485" />
+ <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1589073" vmpeak="1631151" vmrss="659287" vmhwm="966721" />
+ <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1557202" vmpeak="1973197" vmrss="1079972" vmhwm="1580035" />
+ <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1341314" vmpeak="1650890" vmrss="665329" vmhwm="974724" />
+ <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1591844" vmpeak="1793074" vmrss="1255238" vmhwm="1456566" />
+ <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1441388" vmpeak="1650797" vmrss="682999" vmhwm="973897" />
+ <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1605884" vmpeak="1696297" vmrss="1128160" vmhwm="1303270" />
+ <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903562" vmpeak="903562" vmrss="180684" vmhwm="180684" />
+ <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1301939" vmpeak="1301939" vmrss="964126" vmhwm="964126" />
+ <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170582" vmpeak="1255779" vmrss="189836" vmhwm="189836" />
+ <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1057290" vmpeak="1142486" vmrss="582316" vmhwm="582316" />
+ <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1155512" vmpeak="1257531" vmrss="406551" vmhwm="508289" />
+ <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1884636" vmpeak="1884636" vmrss="1547655" vmhwm="1547655" />
+ <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1241500" vmpeak="1326696" vmrss="419666" vmhwm="506740" />
+ <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1583504" vmpeak="1668700" vmrss="1108941" vmhwm="1108941" />
+ <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992170" vmpeak="1004790" vmrss="275704" vmhwm="288189" />
+ <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1487241" vmpeak="1487241" vmrss="1150458" vmhwm="1150458" />
+ <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259122" vmpeak="1259122" vmrss="283545" vmhwm="286317" />
+ <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1294259" vmpeak="1379456" vmrss="819712" vmhwm="819712" />
+ <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1135388" vmpeak="1188803" vmrss="366688" vmhwm="384436" />
+ <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1903132" vmpeak="1903132" vmrss="1341693" vmhwm="1509783" />
+ <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1221381" vmpeak="1306578" vmrss="376038" vmhwm="384514" />
+ <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1517360" vmpeak="1602556" vmrss="1041424" vmhwm="1041424" />
+ <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="2658385" vmpeak="3374820" vmrss="1479264" vmhwm="2195507" />
+ <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="3398751" vmpeak="3980990" vmrss="3009406" vmhwm="3589695" />
+ <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2763358" vmpeak="3374727" vmrss="1996228" vmhwm="2195658" />
+ <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="3381653" vmpeak="3900676" vmrss="2904111" vmhwm="3506760" />
+ <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1254858" vmpeak="1436120" vmrss="461666" vmhwm="642226" />
+ <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1880288" vmpeak="2024947" vmrss="1544847" vmhwm="1688965" />
+ <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1529008" vmpeak="1529008" vmrss="505601" vmhwm="640972" />
+ <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1560561" vmpeak="1620039" vmrss="1084423" vmhwm="1227179" />
+ <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1467497" vmpeak="1765602" vmrss="637795" vmhwm="935719" />
+ <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1611261" vmpeak="2008177" vmrss="1219769" vmhwm="1615723" />
+ <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1771364" vmpeak="1771364" vmrss="805464" vmhwm="935511" />
+ <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1605936" vmpeak="1895415" vmrss="1127750" vmhwm="1502191" />
+ <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1436468" vmpeak="1623923" vmrss="753001" vmhwm="940030" />
+ <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2477649" vmpeak="2606604" vmrss="1727107" vmhwm="1917645" />
+ <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1704596" vmpeak="1704596" vmrss="763807" vmhwm="939510" />
+ <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2069168" vmpeak="2154365" vmrss="1592208" vmhwm="1718236" />
+ <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="755742" vmpeak="920202" vmrss="149593" vmhwm="149593" />
+ <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="941834" vmpeak="941834" vmrss="605690" vmhwm="605690" />
+ <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1012616" vmpeak="1012616" vmrss="154793" vmhwm="154793" />
+ <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="827018" vmpeak="912215" vmrss="350012" vmhwm="350012" />
+ <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="833872" vmpeak="886454" vmrss="162780" vmhwm="214853" />
+ <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1017369" vmpeak="1055308" vmrss="681980" vmhwm="719721" />
+ <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="920363" vmpeak="1005560" vmrss="167133" vmhwm="214895" />
+ <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="898206" vmpeak="983403" vmrss="419707" vmhwm="455660" />
+ <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="980382" vmpeak="1099368" vmrss="295952" vmhwm="414325" />
+ <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1322427" vmpeak="1407354" vmrss="987646" vmhwm="1072141" />
+ <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1248421" vmpeak="1248421" vmrss="307860" vmhwm="415298" />
+ <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1158170" vmpeak="1243366" vmrss="680934" vmhwm="763703" />
+ <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1188829" vmpeak="1392934" vmrss="513037" vmhwm="716632" />
+ <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1859291" vmpeak="1997377" vmrss="1524088" vmhwm="1661504" />
+ <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1456962" vmpeak="1456962" vmrss="521965" vmhwm="715650" />
+ <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1605110" vmpeak="1690306" vmrss="1127874" vmhwm="1262539" />
+ <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="694122" vmpeak="774706" vmrss="35958" vmhwm="35958" />
+ <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="617312" vmpeak="617312" vmrss="281574" vmhwm="281574" />
+ <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="961006" vmpeak="1046203" vmrss="35443" vmhwm="35443" />
+ <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="610729" vmpeak="695926" vmrss="132324" vmhwm="132324" />
+ <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720948" vmpeak="795828" vmrss="98992" vmhwm="98992" />
+ <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="770952" vmpeak="770952" vmrss="435333" vmhwm="435333" />
+ <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="987984" vmpeak="1073181" vmrss="103136" vmhwm="103136" />
+ <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727896" vmpeak="813092" vmrss="252522" vmhwm="252522" />
+ <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727100" vmpeak="727100" vmrss="92372" vmhwm="92372" />
+ <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="858800" vmpeak="858800" vmrss="523712" vmhwm="523712" />
+ <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="994151" vmpeak="1079348" vmrss="100588" vmhwm="100588" />
+ <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="763750" vmpeak="848946" vmrss="288984" vmhwm="288984" />
+ <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="774893" vmrss="34673" vmhwm="34673" />
+ <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631940" vmpeak="631940" vmrss="288189" vmhwm="288189" />
+ <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="960580" vmpeak="1045777" vmrss="35604" vmhwm="35604" />
+ <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618436" vmpeak="703632" vmrss="140368" vmhwm="140368" />
+ <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="866314" vmrss="43825" vmhwm="43825" />
+ <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="728395" vmpeak="756038" vmrss="383780" vmhwm="410545" />
+ <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="979997" vmpeak="979997" vmrss="128320" vmhwm="128320" />
+ <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="763287" vmpeak="848484" vmrss="284648" vmhwm="284648" />
+ <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30253" vmhwm="30253" />
+ <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="520577" vmpeak="523374" vmrss="126614" vmhwm="129084" />
+ <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="963367" vmpeak="1048564" vmrss="33337" vmhwm="33337" />
+ <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605597" vmpeak="690794" vmrss="128091" vmhwm="129911" />
+ <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="1141790" vmpeak="1336405" vmrss="431813" vmhwm="626236" />
+ <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="1443811" vmpeak="1566063" vmrss="1055756" vmhwm="1177592" />
+ <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1409517" vmpeak="1409517" vmrss="472004" vmhwm="625461" />
+ <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1361157" vmpeak="1446354" vmrss="883168" vmhwm="1005030" />
+ <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="1125716" vmpeak="1312344" vmrss="413764" vmhwm="600215" />
+ <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="1426141" vmpeak="1538960" vmrss="1037488" vmhwm="1149792" />
+ <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1212156" vmpeak="1312438" vmrss="455239" vmhwm="601276" />
+ <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1337679" vmpeak="1365301" vmrss="859944" vmhwm="972233" />
+ <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="1299688" vmpeak="1563577" vmrss="586242" vmhwm="849924" />
+ <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1812174" vmpeak="1997912" vmrss="1424103" vmhwm="1609166" />
+ <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1386018" vmpeak="1563577" vmrss="626147" vmhwm="849420" />
+ <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1652414" vmpeak="1755286" vmrss="1174087" vmhwm="1361599" />
+ <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1287572" vmpeak="1580612" vmrss="624582" vmhwm="917441" />
+ <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1513813" vmpeak="1998531" vmrss="1151737" vmhwm="1636216" />
+ <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1464517" vmpeak="1580597" vmrss="626922" vmhwm="916905" />
+ <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1498551" vmpeak="1889992" vmrss="1020489" vmhwm="1496653" />
+ <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="746007" vmpeak="746007" vmrss="136240" vmhwm="136240" />
+ <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="926957" vmpeak="926957" vmrss="577309" vmhwm="577309" />
+ <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1013547" vmpeak="1013547" vmrss="142885" vmhwm="142885" />
+ <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="813794" vmpeak="898991" vmrss="336570" vmhwm="336570" />
+ <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="824631" vmpeak="897722" vmrss="151590" vmhwm="210714" />
+ <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="838567" vmpeak="891956" vmrss="503739" vmhwm="557273" />
+ <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="910988" vmpeak="996184" vmrss="158886" vmhwm="211936" />
+ <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="818776" vmpeak="903973" vmrss="341322" vmhwm="391955" />
+ <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1184934" vmpeak="1406100" vmrss="511170" vmhwm="731827" />
+ <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1640386" vmpeak="1850810" vmrss="1305855" vmhwm="1515966" />
+ <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1452578" vmpeak="1452578" vmrss="518258" vmhwm="732508" />
+ <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1479166" vmpeak="1604392" vmrss="1000901" vmhwm="1210248" />
+ <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1360918" vmpeak="1658852" vmrss="684892" vmhwm="982316" />
+ <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2023595" vmpeak="2311010" vmrss="1620923" vmhwm="1906216" />
+ <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628577" vmpeak="1713774" vmrss="691672" vmhwm="982930" />
+ <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814176" vmpeak="2016393" vmrss="1336238" vmhwm="1622244" />
+ <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="2119015" vmpeak="2465268" vmrss="1307748" vmhwm="1653490" />
+ <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="3063808" vmpeak="3522360" vmrss="2673543" vmhwm="3130623" />
+ <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2386618" vmpeak="2465538" vmrss="1321663" vmhwm="1652372" />
+ <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2799269" vmpeak="3172618" vmrss="2321664" vmhwm="2777736" />
+ <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="974698" vmpeak="1100762" vmrss="304220" vmhwm="429774" />
+ <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1173671" vmpeak="1286625" vmrss="838682" vmhwm="951636" />
+ <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1242233" vmpeak="1242233" vmrss="310086" vmhwm="429150" />
+ <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1090726" vmpeak="1175922" vmrss="613813" vmhwm="726200" />
+ <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="870022" vmpeak="924336" vmrss="179088" vmhwm="232892" />
+ <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="1125753" vmpeak="1166344" vmrss="786666" vmhwm="827138" />
+ <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="1137541" vmpeak="1137541" vmrss="184485" vmhwm="232949" />
+ <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="955177" vmpeak="1040374" vmrss="477032" vmhwm="519178" />
+ <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="1074985" vmpeak="1208168" vmrss="344406" vmhwm="477089" />
+ <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="1383397" vmpeak="1496918" vmrss="980408" vmhwm="1092702" />
+ <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1168200" vmpeak="1253397" vmrss="374275" vmhwm="477698" />
+ <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1240657" vmpeak="1325854" vmrss="762725" vmhwm="854386" />
+ <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713351" vmpeak="787898" vmrss="52858" vmhwm="52858" />
+ <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="719794" vmpeak="719794" vmrss="384508" vmhwm="384508" />
+ <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="980522" vmpeak="980522" vmrss="59456" vmhwm="59456" />
+ <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="686613" vmpeak="771810" vmrss="211426" vmhwm="211426" />
+ <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705796" vmpeak="705796" vmrss="52405" vmhwm="52405" />
+ <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724984" vmpeak="724984" vmrss="390031" vmhwm="390031" />
+ <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791918" vmpeak="877115" vmrss="56269" vmhwm="56269" />
+ <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674590" vmpeak="759787" vmrss="199139" vmhwm="199139" />
+ <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="850278" vmpeak="901976" vmrss="168672" vmhwm="218660" />
+ <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="1092353" vmpeak="1123298" vmrss="689566" vmhwm="762699" />
+ <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1118015" vmpeak="1118015" vmrss="177444" vmhwm="218670" />
+ <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="944564" vmpeak="1029761" vmrss="467672" vmhwm="495326" />
+ <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="740069" vmpeak="740069" vmrss="128315" vmhwm="128315" />
+ <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="791986" vmpeak="791986" vmrss="456830" vmhwm="456830" />
+ <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="1188891" vmpeak="1274088" vmrss="138252" vmhwm="138252" />
+ <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="819218" vmpeak="904415" vmrss="342066" vmhwm="342066" />
+ <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="740714" vmpeak="803946" vmrss="126521" vmhwm="126521" />
+ <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="925225" vmpeak="925225" vmrss="519417" vmhwm="586206" />
+ <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="1008446" vmpeak="1093643" vmrss="135714" vmhwm="135714" />
+ <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="824470" vmpeak="909667" vmrss="348103" vmhwm="348103" />
+ <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046843" vmpeak="1178897" vmrss="308848" vmhwm="440377" />
+ <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1151961" vmpeak="1168070" vmrss="815692" vmhwm="831932" />
+ <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1321751" vmpeak="1321751" vmrss="373412" vmhwm="440299" />
+ <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1200820" vmpeak="1286017" vmrss="725717" vmhwm="734500" />
+ <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="1186697" vmpeak="1322895" vmrss="323164" vmhwm="457116" />
+ <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="1522606" vmpeak="1522606" vmrss="1120277" vmhwm="1120277" />
+ <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1288424" vmpeak="1373621" vmrss="500370" vmhwm="500370" />
+ <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1449448" vmpeak="1534644" vmrss="973845" vmhwm="973845" />
+ <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133809" vmpeak="2836407" vmrss="1438444" vmhwm="2140850" />
+ <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707359" vmpeak="3834188" vmrss="2314816" vmhwm="3441464" />
+ <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401339" vmpeak="3101945" vmrss="1469098" vmhwm="2139987" />
+ <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2792654" vmpeak="3834136" vmrss="2314577" vmhwm="3440408" />
+ <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188804" vmpeak="2918375" vmrss="1492623" vmhwm="2222001" />
+ <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2898989" vmpeak="4025117" vmrss="2481081" vmhwm="3626459" />
+ <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2275379" vmpeak="2918474" vmrss="1523834" vmhwm="2221715" />
+ <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2876250" vmpeak="3944834" vmrss="2398682" vmhwm="3551002" />
+ <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="873480" vmpeak="943924" vmrss="196320" vmhwm="266656" />
+ <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="1067367" vmpeak="1101604" vmrss="730048" vmhwm="764051" />
+ <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="961745" vmpeak="1046942" vmrss="212149" vmhwm="266546" />
+ <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="976471" vmpeak="1061668" vmrss="499335" vmhwm="528736" />
+ <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="1428580" vmpeak="1776923" vmrss="741670" vmhwm="1089587" />
+ <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="1842729" vmpeak="2177494" vmrss="1452183" vmhwm="1785934" />
+ <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1514890" vmpeak="1776834" vmrss="756730" vmhwm="1088464" />
+ <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1753476" vmpeak="2003045" vmrss="1275523" vmhwm="1608807" />
+ <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3478618" vmpeak="4858219" vmrss="2796794" vmhwm="4176062" />
+ <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4842442" vmpeak="6987687" vmrss="4397738" vmhwm="6544928" />
+ <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3567340" vmpeak="4858193" vmrss="2814666" vmhwm="4176177" />
+ <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4814217" vmpeak="6932785" vmrss="4335193" vmhwm="6538194" />
+ <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="998956" vmpeak="1136428" vmrss="307600" vmhwm="444735" />
+ <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1052719" vmpeak="1232316" vmrss="717854" vmhwm="897540" />
+ <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1258004" vmpeak="1258004" vmrss="326175" vmhwm="443996" />
+ <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1059619" vmpeak="1138789" vmrss="582155" vmhwm="745664" />
+ <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1249211" vmpeak="1506304" vmrss="550752" vmhwm="807762" />
+ <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1492743" vmpeak="1714642" vmrss="1095354" vmhwm="1316988" />
+ <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1427483" vmpeak="1512680" vmrss="582514" vmhwm="806858" />
+ <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1456343" vmpeak="1595287" vmrss="978369" vmhwm="1201579" />
+ <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="871930" vmpeak="952359" vmrss="193388" vmhwm="273634" />
+ <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="878768" vmpeak="973180" vmrss="533348" vmhwm="627848" />
+ <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="959909" vmpeak="1045106" vmrss="208156" vmhwm="273530" />
+ <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="883818" vmpeak="969014" vmrss="406442" vmhwm="476595" />
+ <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388405" vmpeak="1700311" vmrss="680352" vmhwm="991998" />
+ <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1970503" vmpeak="2164422" vmrss="1583935" vmhwm="1777209" />
+ <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1661649" vmpeak="1746846" vmrss="723148" vmhwm="991354" />
+ <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1812694" vmpeak="1917910" vmrss="1335609" vmhwm="1524931" />
+ <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1321320" vmpeak="1630896" vmrss="658730" vmhwm="968125" />
+ <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1563660" vmpeak="2064852" vmrss="1226097" vmhwm="1727050" />
+ <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1679251" vmpeak="1849645" vmrss="659406" vmhwm="966815" />
+ <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1557181" vmpeak="1973176" vmrss="1079998" vmhwm="1579983" />
+ <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="908549" vmpeak="908549" vmrss="180804" vmhwm="180804" />
+ <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1315620" vmpeak="1315620" vmrss="978213" vmhwm="978213" />
+ <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170239" vmpeak="1255436" vmrss="189326" vmhwm="189326" />
+ <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1068553" vmpeak="1153750" vmrss="590298" vmhwm="590298" />
+ <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1160718" vmpeak="1262736" vmrss="405376" vmhwm="507317" />
+ <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1898410" vmpeak="1898410" vmrss="1560884" vmhwm="1560884" />
+ <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1240917" vmpeak="1326114" vmrss="419094" vmhwm="507306" />
+ <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1594502" vmpeak="1679698" vmrss="1116954" vmhwm="1116954" />
+ <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="991671" vmpeak="1004291" vmrss="275397" vmhwm="287918" />
+ <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1616690" vmpeak="1618188" vmrss="1278908" vmhwm="1280494" />
+ <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1258623" vmpeak="1258623" vmrss="284320" vmhwm="287606" />
+ <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1303156" vmpeak="1388353" vmrss="824928" vmhwm="824928" />
+ <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1134889" vmpeak="1188636" vmrss="367130" vmhwm="384935" />
+ <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1865047" vmpeak="1865047" vmrss="1527947" vmhwm="1527947" />
+ <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1220882" vmpeak="1306078" vmrss="376006" vmhwm="384217" />
+ <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1551019" vmpeak="1636216" vmrss="1071928" vmhwm="1071928" />
+ <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1255898" vmpeak="1437160" vmrss="461385" vmhwm="642049" />
+ <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1800479" vmpeak="1945580" vmrss="1462780" vmhwm="1607470" />
+ <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1530053" vmpeak="1530053" vmrss="505570" vmhwm="641368" />
+ <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1561955" vmpeak="1619753" vmrss="1084324" vmhwm="1225473" />
+ <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2833797" vmpeak="3516609" vmrss="1409798" vmhwm="2092417" />
+ <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="4293634" vmpeak="4293634" vmrss="3955525" vmhwm="3955525" />
+ <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="3022032" vmpeak="3516609" vmrss="2255333" vmhwm="2255333" />
+ <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="4277993" vmpeak="4363190" vmrss="3799333" vmhwm="3799333" />
+ <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="1066384" vmpeak="1233736" vmrss="390972" vmhwm="557528" />
+ <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="1358442" vmpeak="1615062" vmrss="1020947" vmhwm="1273121" />
+ <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1243392" vmpeak="1328589" vmrss="398580" vmhwm="558469" />
+ <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1256070" vmpeak="1398212" vmrss="778549" vmhwm="1001192" />
+ <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437560" vmpeak="1625010" vmrss="754254" vmhwm="941142" />
+ <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2281713" vmpeak="2410668" vmrss="1943780" vmhwm="2072428" />
+ <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524473" vmpeak="1625005" vmrss="763001" vmhwm="940264" />
+ <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2070671" vmpeak="2155868" vmrss="1593108" vmhwm="1719125" />
+ <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="835629" vmpeak="889226" vmrss="164216" vmhwm="217245" />
+ <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="983507" vmpeak="1024665" vmrss="645985" vmhwm="686930" />
+ <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="927451" vmpeak="1012648" vmrss="168360" vmhwm="216569" />
+ <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="900712" vmpeak="985909" vmrss="423519" vmhwm="463533" />
+ <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="980636" vmpeak="1099706" vmrss="296680" vmhwm="415194" />
+ <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="1326213" vmpeak="1409371" vmrss="988488" vmhwm="1071366" />
+ <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1248691" vmpeak="1248691" vmrss="306857" vmhwm="414752" />
+ <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1163032" vmpeak="1248228" vmrss="685843" vmhwm="765507" />
+ <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189531" vmpeak="1393636" vmrss="513661" vmhwm="717204" />
+ <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1866176" vmpeak="2002847" vmrss="1528664" vmhwm="1664577" />
+ <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1457669" vmpeak="1457669" vmrss="523811" vmhwm="715837" />
+ <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1606243" vmpeak="1691440" vmrss="1129185" vmhwm="1262534" />
+ <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="1521920" vmpeak="1894167" vmrss="814210" vmhwm="1185704" />
+ <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1961772" vmpeak="2317998" vmrss="1623268" vmhwm="1979062" />
+ <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1789325" vmpeak="1894157" vmrss="828328" vmhwm="1185480" />
+ <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1951877" vmpeak="2240295" vmrss="1479337" vmhwm="1843041" />
+ <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="1427384" vmpeak="1755920" vmrss="719097" vmhwm="1047295" />
+ <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="2059070" vmpeak="2371101" vmrss="1721616" vmhwm="2033194" />
+ <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1694035" vmpeak="1779232" vmrss="732596" vmhwm="1046208" />
+ <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1863825" vmpeak="2084664" vmrss="1386002" vmhwm="1691248" />
+ <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720959" vmpeak="795839" vmrss="98898" vmhwm="98898" />
+ <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="749106" vmpeak="749106" vmrss="411049" vmhwm="411049" />
+ <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="806941" vmpeak="806941" vmrss="104702" vmhwm="104702" />
+ <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727818" vmpeak="813014" vmrss="252787" vmhwm="252787" />
+ <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727116" vmpeak="793010" vmrss="92508" vmhwm="92508" />
+ <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="817554" vmpeak="817554" vmrss="479762" vmhwm="479762" />
+ <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="813108" vmpeak="898305" vmrss="99481" vmhwm="99481" />
+ <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="765070" vmpeak="850267" vmrss="290040" vmhwm="290040" />
+ <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="694023" vmrss="34377" vmhwm="34377" />
+ <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631919" vmpeak="631919" vmrss="294070" vmhwm="294070" />
+ <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="779532" vmpeak="864728" vmrss="36524" vmhwm="36524" />
+ <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618586" vmpeak="703783" vmrss="140582" vmhwm="140582" />
+ <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="783447" vmrss="42936" vmhwm="42936" />
+ <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="724302" vmpeak="724302" vmrss="386261" vmhwm="386339" />
+ <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="1070524" vmpeak="1155720" vmrss="129376" vmhwm="129376" />
+ <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="762933" vmpeak="848130" vmrss="284216" vmhwm="284216" />
+ <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30700" vmhwm="30700" />
+ <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="588270" vmpeak="610240" vmrss="250692" vmhwm="269453" />
+ <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="958042" vmpeak="958042" vmrss="30908" vmhwm="30908" />
+ <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605176" vmpeak="690372" vmrss="127602" vmhwm="129365" />
+ <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="732747" vmpeak="732747" vmrss="146874" vmhwm="146874" />
+ <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="778096" vmpeak="778096" vmrss="439654" vmhwm="439654" />
+ <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="818864" vmpeak="904061" vmrss="148220" vmhwm="148220" />
+ <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="781279" vmpeak="866476" vmrss="323528" vmhwm="323528" />
+ <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="739559" vmpeak="739559" vmrss="67152" vmhwm="67152" />
+ <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="769938" vmpeak="769938" vmrss="431922" vmhwm="431922" />
+ <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="1007323" vmpeak="1007323" vmrss="99127" vmhwm="99127" />
+ <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="760047" vmpeak="845244" vmrss="281866" vmhwm="281866" />
+ <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1219296" vmpeak="1440462" vmrss="513271" vmhwm="733850" />
+ <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1693062" vmpeak="1898192" vmrss="1355270" vmhwm="1559838" />
+ <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1305881" vmpeak="1440556" vmrss="527399" vmhwm="732924" />
+ <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1500881" vmpeak="1620819" vmrss="1022845" vmhwm="1226721" />
+ <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1406802" vmpeak="1704736" vmrss="687445" vmhwm="984760" />
+ <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2147516" vmpeak="2429642" vmrss="1810073" vmhwm="2091382" />
+ <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1674363" vmpeak="1759560" vmrss="702972" vmhwm="984744" />
+ <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1849614" vmpeak="2046543" vmrss="1371458" vmhwm="1652222" />
+ <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1218568" vmpeak="1439734" vmrss="513505" vmhwm="734136" />
+ <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1688476" vmpeak="1897693" vmrss="1350502" vmhwm="1559168" />
+ <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1305106" vmpeak="1439828" vmrss="526188" vmhwm="732721" />
+ <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1498400" vmpeak="1619649" vmrss="1021170" vmhwm="1226201" />
+ <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1406007" vmpeak="1703941" vmrss="687798" vmhwm="985082" />
+ <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2132431" vmpeak="2419976" vmrss="1795331" vmhwm="2082298" />
+ <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1673562" vmpeak="1758759" vmrss="702202" vmhwm="984557" />
+ <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1852832" vmpeak="2055175" vmrss="1375025" vmhwm="1661046" />
+ <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="1214486" vmpeak="1422704" vmrss="531008" vmhwm="738576" />
+ <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1653386" vmpeak="1850721" vmrss="1316047" vmhwm="1513090" />
+ <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1307545" vmpeak="1422720" vmrss="553290" vmhwm="739018" />
+ <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1505826" vmpeak="1597455" vmrss="1028154" vmhwm="1203888" />
+ <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="1639840" vmpeak="2058960" vmrss="933025" vmhwm="1351495" />
+ <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="2290340" vmpeak="2674006" vmrss="1952048" vmhwm="2335455" />
+ <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1914021" vmpeak="2149482" vmrss="959363" vmhwm="1351006" />
+ <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="2119436" vmpeak="2416320" vmrss="1662554" vmhwm="2022462" />
+ <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705806" vmpeak="780353" vmrss="52806" vmhwm="52806" />
+ <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="700835" vmpeak="700835" vmrss="362949" vmhwm="362949" />
+ <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791934" vmpeak="791934" vmrss="56794" vmhwm="56794" />
+ <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674611" vmpeak="759808" vmrss="198120" vmhwm="198120" />
+ <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046858" vmpeak="1178912" vmrss="308542" vmhwm="439483" />
+ <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1226248" vmpeak="1247022" vmrss="889018" vmhwm="909454" />
+ <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1140729" vmpeak="1225926" vmrss="372574" vmhwm="439826" />
+ <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1199894" vmpeak="1285091" vmrss="724178" vmhwm="734505" />
+ <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2134158" vmpeak="2836756" vmrss="1438309" vmhwm="2140715" />
+ <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2781932" vmpeak="3912818" vmrss="2443178" vmhwm="3574105" />
+ <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2220634" vmpeak="2836865" vmrss="1468797" vmhwm="2139722" />
+ <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2790174" vmpeak="3834277" vmrss="2311826" vmhwm="3439888" />
+ <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2189153" vmpeak="2918723" vmrss="1491048" vmhwm="2220868" />
+ <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2869105" vmpeak="4001228" vmrss="2531100" vmhwm="3662869" />
+ <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2366254" vmpeak="2918817" vmrss="1523605" vmhwm="2221388" />
+ <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877716" vmpeak="3944751" vmrss="2400091" vmhwm="3551449" />
+ <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3569482" vmpeak="4949084" vmrss="2797106" vmhwm="4176364" />
+ <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4819713" vmpeak="6984764" vmrss="4481042" vmhwm="6645126" />
+ <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3929790" vmpeak="4858536" vmrss="2814931" vmhwm="4176198" />
+ <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4816962" vmpeak="6932770" vmrss="4337715" vmhwm="6538006" />
+ <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="1172662" vmpeak="1401509" vmrss="491966" vmhwm="720564" />
+ <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1345822" vmpeak="1585391" vmrss="1008384" vmhwm="1247916" />
+ <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1442381" vmpeak="1442381" vmrss="510697" vmhwm="720267" />
+ <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1348219" vmpeak="1513917" vmrss="870485" vmhwm="1120215" />
+ <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="1106159" vmpeak="1204460" vmrss="268408" vmhwm="366470" />
+ <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="1568190" vmpeak="1568190" vmrss="1230538" vmhwm="1230538" />
+ <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1395617" vmpeak="1395617" vmrss="399692" vmhwm="399692" />
+ <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1513621" vmpeak="1598818" vmrss="1035897" vmhwm="1035897" />
+ <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="1108187" vmpeak="1206488" vmrss="271648" vmhwm="369590" />
+ <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="2870816" vmpeak="2870816" vmrss="1290972" vmhwm="1290972" />
+ <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1396408" vmpeak="1396408" vmrss="396172" vmhwm="396172" />
+ <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2778490" vmpeak="2863686" vmrss="2307058" vmhwm="2307058" />
+ <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="979706" vmpeak="1098692" vmrss="295682" vmhwm="414247" />
+ <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1303499" vmpeak="1390069" vmrss="965224" vmhwm="1051580" />
+ <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1247750" vmpeak="1247750" vmrss="307928" vmhwm="415266" />
+ <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1160265" vmpeak="1245462" vmrss="682354" vmhwm="766100" />
+ <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304610" vmhwm="430336" />
+ <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1170265" vmpeak="1281675" vmrss="833180" vmhwm="944299" />
+ <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1253189" vmpeak="1253189" vmrss="316373" vmhwm="429618" />
+ <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1091214" vmpeak="1176411" vmrss="613095" vmhwm="724110" />
+ <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304772" vmhwm="430414" />
+ <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="1150806" vmpeak="1261878" vmrss="813394" vmhwm="924123" />
+ <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1253194" vmpeak="1253194" vmrss="315463" vmhwm="428974" />
+ <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1090070" vmpeak="1175267" vmrss="612274" vmhwm="722924" />
+ <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="705577" vmpeak="780457" vmrss="53320" vmhwm="53320" />
+ <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="716476" vmpeak="716476" vmrss="378487" vmhwm="378487" />
+ <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="972613" vmpeak="1057810" vmrss="57033" vmhwm="57033" />
+ <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="672594" vmpeak="757790" vmrss="194183" vmhwm="194183" />
+ <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="1863586" vmpeak="2298270" vmrss="1166578" vmhwm="1601236" />
+ <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="3438385" vmpeak="3992487" vmrss="3100890" vmhwm="3654268" />
+ <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="2136893" vmpeak="2298270" vmrss="1177888" vmhwm="1601350" />
+ <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2866156" vmpeak="3332056" vmrss="2390778" vmhwm="2939315" />
+ <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="1795970" vmpeak="2230654" vmrss="1095978" vmhwm="1530557" />
+ <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="3373229" vmpeak="3883687" vmrss="3035104" vmhwm="3545068" />
+ <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="2069298" vmpeak="2230675" vmrss="1108967" vmhwm="1530178" />
+ <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2783367" vmpeak="3206626" vmrss="2308222" vmhwm="2813283" />
+ <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="1389767" vmpeak="1653657" vmrss="587459" vmhwm="851136" />
+ <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1997091" vmpeak="1999374" vmrss="1659538" vmhwm="1661498" />
+ <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1660250" vmpeak="1660250" vmrss="717350" vmhwm="850948" />
+ <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1842703" vmpeak="1927900" vmrss="1363991" vmhwm="1363991" />
+ <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="783562" vmpeak="783562" vmrss="74089" vmhwm="74089" />
+ <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="976300" vmpeak="976300" vmrss="639132" vmhwm="639132" />
+ <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="1055204" vmpeak="1140401" vmrss="135018" vmhwm="135018" />
+ <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="895616" vmpeak="980813" vmrss="418631" vmhwm="418631" />
+ <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903520" vmpeak="903520" vmrss="182405" vmhwm="182405" />
+ <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1300780" vmpeak="1300780" vmrss="963144" vmhwm="963144" />
+ <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1261171" vmpeak="1346368" vmrss="191354" vmhwm="191354" />
+ <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1066088" vmpeak="1151285" vmrss="588608" vmhwm="588608" />
+ <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992097" vmpeak="1004718" vmrss="276021" vmhwm="288532" />
+ <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1673510" vmpeak="1686178" vmrss="1335256" vmhwm="1346415" />
+ <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259304" vmpeak="1259304" vmrss="285667" vmhwm="288584" />
+ <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1318803" vmpeak="1404000" vmrss="840652" vmhwm="840652" />
+ <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="742190" vmpeak="801429" vmrss="120036" vmhwm="120036" />
+ <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="917155" vmpeak="917155" vmrss="580470" vmhwm="580470" />
+ <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="828079" vmpeak="828079" vmrss="124950" vmhwm="124950" />
+ <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="798803" vmpeak="884000" vmrss="322223" vmhwm="322223" />
+ <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="1036542" vmpeak="1123340" vmrss="332675" vmhwm="418984" />
+ <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1419095" vmpeak="1503018" vmrss="1081142" vmhwm="1164966" />
+ <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1122513" vmpeak="1207710" vmrss="333564" vmhwm="417877" />
+ <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1206654" vmpeak="1291851" vmrss="729799" vmhwm="812141" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2502557" vmpeak="2710479" vmrss="803394" vmhwm="1011098" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4844647" vmpeak="4844647" vmrss="4505820" vmhwm="4505820" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="927518" vmpeak="990735" vmrss="192327" vmhwm="255424" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1410156" vmpeak="1410156" vmrss="1071818" vmhwm="1071818" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1348308" vmpeak="1587736" vmrss="555162" vmhwm="794456" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2073328" vmpeak="2139914" vmrss="1735650" vmhwm="1801794" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="1137926" vmpeak="1282252" vmrss="347172" vmhwm="491384" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="1528581" vmpeak="1558133" vmrss="1191273" vmhwm="1220918" />
+ <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="1064445" vmpeak="1124276" vmrss="233131" vmhwm="292728" />
+ <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="1608666" vmpeak="1608666" vmrss="1270744" vmhwm="1270744" />
+ <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1209941" vmpeak="1295138" vmrss="396422" vmhwm="396422" />
+ <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1593238" vmpeak="1678435" vmrss="1137583" vmhwm="1257484" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713814" vmpeak="788028" vmrss="53034" vmhwm="53034" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="701729" vmpeak="701729" vmrss="363578" vmhwm="363578" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="799869" vmpeak="885066" vmrss="59810" vmhwm="59810" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="687694" vmpeak="772891" vmrss="209248" vmhwm="209248" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="706258" vmpeak="780140" vmrss="52884" vmhwm="52884" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="705052" vmpeak="705052" vmrss="367395" vmhwm="367395" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="973367" vmpeak="1058564" vmrss="56414" vmhwm="56414" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677320" vmpeak="762517" vmrss="198619" vmhwm="198619" />
+ <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437061" vmpeak="1624516" vmrss="755024" vmhwm="942141" />
+ <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2478034" vmpeak="2597150" vmrss="2139680" vmhwm="2258219" />
+ <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524120" vmpeak="1624521" vmrss="762559" vmhwm="940914" />
+ <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2100274" vmpeak="2185471" vmrss="1622847" vmhwm="1739566" />
+ <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="748534" vmpeak="809437" vmrss="143514" vmhwm="143514" />
+ <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="943758" vmpeak="943758" vmrss="606392" vmhwm="606392" />
+ <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1015783" vmpeak="1015783" vmrss="147118" vmhwm="147118" />
+ <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="835073" vmpeak="920270" vmrss="357146" vmhwm="357146" />
+ <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="834953" vmpeak="887541" vmrss="164626" vmhwm="217001" />
+ <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1034649" vmpeak="1064835" vmrss="696592" vmhwm="726694" />
+ <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="921081" vmpeak="1006278" vmrss="167502" vmhwm="215597" />
+ <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="911310" vmpeak="996507" vmrss="433617" vmhwm="464682" />
+ <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="971453" vmpeak="1081683" vmrss="305390" vmhwm="415204" />
+ <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1332598" vmpeak="1413375" vmrss="995165" vmhwm="1075859" />
+ <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1148685" vmpeak="1233882" vmrss="314220" vmhwm="414882" />
+ <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1167634" vmpeak="1252830" vmrss="689416" vmhwm="769002" />
+ <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189630" vmpeak="1393740" vmrss="511908" vmhwm="715540" />
+ <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1867418" vmpeak="2007080" vmrss="1529990" vmhwm="1668929" />
+ <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1367256" vmpeak="1452453" vmrss="523946" vmhwm="715577" />
+ <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1611350" vmpeak="1696546" vmrss="1133615" vmhwm="1270427" />
+ <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2715268" vmpeak="3061650" vmrss="776375" vmhwm="1122695" />
+ <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4160156" vmpeak="4971210" vmrss="3823164" vmhwm="4634151" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="701350" vmpeak="776562" vmrss="42281" vmhwm="42281" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="717771" vmpeak="717771" vmrss="379501" vmhwm="379501" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="786552" vmpeak="786552" vmrss="42406" vmhwm="42406" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="656084" vmpeak="741280" vmrss="177543" vmhwm="177543" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="705936" vmpeak="781149" vmrss="55619" vmhwm="55619" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="724765" vmpeak="724765" vmrss="386458" vmhwm="386458" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="791554" vmpeak="791554" vmrss="55582" vmhwm="55582" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="670987" vmpeak="756184" vmrss="193029" vmhwm="193029" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="720673" vmpeak="720673" vmrss="99512" vmhwm="99512" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="771253" vmpeak="771253" vmrss="433087" vmhwm="433087" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="987828" vmpeak="1073025" vmrss="104005" vmhwm="104005" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="726986" vmpeak="812182" vmrss="248450" vmhwm="248450" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="726554" vmpeak="793447" vmrss="91452" vmhwm="91452" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="857027" vmpeak="857027" vmrss="519630" vmhwm="519630" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="812619" vmpeak="897816" vmrss="100895" vmhwm="100895" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="764800" vmpeak="849997" vmrss="287019" vmhwm="287019" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="739960" vmpeak="739960" vmrss="134924" vmhwm="134924" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="905439" vmpeak="905439" vmrss="567876" vmhwm="567876" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="825988" vmpeak="891722" vmrss="144684" vmhwm="144684" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="821251" vmpeak="906448" vmrss="343085" vmhwm="343085" />
+ <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="1026407" vmpeak="1026407" vmrss="351535" vmhwm="351535" />
+ <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="1104485" vmpeak="1149496" vmrss="766740" vmhwm="811642" />
+ <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1209280" vmpeak="1209280" vmrss="362325" vmhwm="362325" />
+ <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1105275" vmpeak="1190472" vmrss="627822" vmhwm="671450" />
+ <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="988072" vmpeak="1114146" vmrss="304798" vmhwm="430279" />
+ <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="1171383" vmpeak="1282325" vmrss="833705" vmhwm="944476" />
+ <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1164982" vmpeak="1250178" vmrss="319394" vmhwm="429904" />
+ <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1090481" vmpeak="1115056" vmrss="613485" vmhwm="722176" />
+ <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1185163" vmpeak="1406329" vmrss="511669" vmhwm="732674" />
+ <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1646897" vmpeak="1857653" vmrss="1308538" vmhwm="1518940" />
+ <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1361906" vmpeak="1447102" vmrss="515138" vmhwm="731073" />
+ <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1486612" vmpeak="1612171" vmrss="1008602" vmhwm="1218973" />
+ <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1361328" vmpeak="1659262" vmrss="685287" vmhwm="983091" />
+ <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2053204" vmpeak="2340951" vmrss="1714788" vmhwm="2002072" />
+ <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628504" vmpeak="1713701" vmrss="690892" vmhwm="983257" />
+ <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1817290" vmpeak="2019841" vmrss="1338792" vmhwm="1625405" />
+ <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="980148" vmpeak="1106211" vmrss="304340" vmhwm="430242" />
+ <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1177410" vmpeak="1291040" vmrss="839217" vmhwm="952868" />
+ <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1060997" vmpeak="1146194" vmrss="308906" vmhwm="429811" />
+ <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1094189" vmpeak="1123038" vmrss="616548" vmhwm="730298" />
+ <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1217086" vmpeak="1438262" vmrss="515611" vmhwm="736502" />
+ <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1721532" vmpeak="1922648" vmrss="1383304" vmhwm="1584195" />
+ <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1394296" vmpeak="1479493" vmrss="530197" vmhwm="735883" />
+ <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1533625" vmpeak="1649492" vmrss="1055813" vmhwm="1256236" />
+ <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1664005" vmpeak="1929070" vmrss="791611" vmhwm="988280" />
+ <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2054062" vmpeak="2324472" vmrss="1715776" vmhwm="1985344" />
+ <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1750642" vmpeak="1750642" vmrss="806811" vmhwm="988041" />
+ <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1905020" vmpeak="2088814" vmrss="1426682" vmhwm="1694347" />
+ <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="994541" vmpeak="1120615" vmrss="307034" vmhwm="432806" />
+ <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="1212042" vmpeak="1312194" vmrss="874780" vmhwm="974438" />
+ <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1081334" vmpeak="1166531" vmrss="322436" vmhwm="432702" />
+ <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1116720" vmpeak="1132315" vmrss="638097" vmhwm="738348" />
+ <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1467762" vmpeak="1671108" vmrss="691412" vmhwm="894509" />
+ <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2625381" vmpeak="2732168" vmrss="2288915" vmhwm="2392494" />
+ <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="713590" vmpeak="788138" vmrss="53216" vmhwm="53216" />
+ <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724427" vmpeak="724427" vmrss="386354" vmhwm="386354" />
+ <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="799604" vmpeak="799604" vmrss="59534" vmhwm="59534" />
+ <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="685677" vmpeak="770874" vmrss="206845" vmhwm="206845" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="832010" vmpeak="832010" vmrss="144367" vmhwm="144367" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="920249" vmpeak="920249" vmrss="582769" vmhwm="582769" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="1009200" vmpeak="1094397" vmrss="156052" vmhwm="156052" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="851666" vmpeak="936863" vmrss="374660" vmhwm="374660" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="1357855" vmpeak="1537842" vmrss="428038" vmhwm="602841" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1748255" vmpeak="1748255" vmrss="1410474" vmhwm="1410474" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1539933" vmpeak="1625130" vmrss="506157" vmhwm="602326" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597762" vmpeak="1597762" vmrss="1125956" vmhwm="1125956" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="1508566" vmpeak="1688554" vmrss="427086" vmhwm="602414" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1694071" vmpeak="1694071" vmrss="1356300" vmhwm="1356300" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1418346" vmpeak="1507495" vmrss="498206" vmhwm="602238" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1533370" vmpeak="1618567" vmrss="1062006" vmhwm="1062006" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="912147" vmpeak="990698" vmrss="224068" vmhwm="302484" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1144707" vmpeak="1222395" vmrss="807570" vmhwm="885076" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="998842" vmpeak="1048663" vmrss="239059" vmhwm="302291" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1054336" vmpeak="1139533" vmrss="577106" vmhwm="651913" />
+ <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="1046905" vmpeak="1206301" vmrss="351400" vmhwm="510603" />
+ <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="1199005" vmpeak="1333363" vmrss="861400" vmhwm="995815" />
+ <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1132003" vmpeak="1217200" vmrss="380998" vmhwm="509615" />
+ <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1174336" vmpeak="1259533" vmrss="696300" vmhwm="857849" />
+ <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133768" vmpeak="2836366" vmrss="1437966" vmhwm="2140403" />
+ <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2803710" vmpeak="3934762" vmrss="2464961" vmhwm="3596054" />
+ <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2400741" vmpeak="2836230" vmrss="1468438" vmhwm="2139410" />
+ <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793221" vmpeak="3855737" vmrss="2313766" vmhwm="3461135" />
+ <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188924" vmpeak="2918494" vmrss="1491630" vmhwm="2221008" />
+ <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2899624" vmpeak="4031731" vmrss="2561410" vmhwm="3693086" />
+ <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2274792" vmpeak="2918401" vmrss="1523438" vmhwm="2221039" />
+ <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877160" vmpeak="3966222" vmrss="2398546" vmhwm="3572186" />
+ <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1252357" vmpeak="1511010" vmrss="552931" vmhwm="811361" />
+ <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1481464" vmpeak="1701512" vmrss="1144072" vmhwm="1363939" />
+ <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1340471" vmpeak="1510438" vmrss="585192" vmhwm="810186" />
+ <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1465339" vmpeak="1601189" vmrss="987604" vmhwm="1207902" />
+ <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="872019" vmpeak="952447" vmrss="192904" vmhwm="272953" />
+ <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="876340" vmpeak="970054" vmrss="538460" vmhwm="632299" />
+ <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="959992" vmpeak="1045189" vmrss="207662" vmhwm="273093" />
+ <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="883292" vmpeak="968489" vmrss="405891" vmhwm="476907" />
+ <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="1248988" vmpeak="1505738" vmrss="549031" vmhwm="805745" />
+ <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="1459816" vmpeak="1681716" vmrss="1121952" vmhwm="1343638" />
+ <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1337055" vmpeak="1506221" vmrss="582212" vmhwm="806447" />
+ <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1456322" vmpeak="1589104" vmrss="977688" vmhwm="1194798" />
+ <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388498" vmpeak="1700405" vmrss="680981" vmhwm="992706" />
+ <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1904952" vmpeak="2102276" vmrss="1567898" vmhwm="1764921" />
+ <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1486066" vmpeak="1705636" vmrss="724443" vmhwm="992409" />
+ <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1809121" vmpeak="1916995" vmrss="1331512" vmhwm="1523137" />
+ <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="803400" vmpeak="848244" vmrss="123765" vmhwm="168360" />
+ <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="795683" vmpeak="825796" vmrss="458718" vmhwm="488498" />
+ <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="892273" vmpeak="977470" vmrss="139048" vmhwm="168292" />
+ <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="789438" vmpeak="874634" vmrss="312400" vmhwm="338832" />
+ </models>
+</attributes>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <devices>
+ <value>CPU</value>
+ <value>GPU</value>
+ </devices>
+ <models>
+ <value>caffe/FP32/alexnet/alexnet.xml</value>
+ <value>caffe/FP32/caffenet/caffenet.xml</value>
+ <value>caffe/FP32/densenet_121/densenet_121.xml</value>
+ <value>caffe/FP32/densenet_161/densenet_161.xml</value>
+ <value>caffe/FP32/densenet_169/densenet_169.xml</value>
+ <value>caffe/FP32/densenet_201/densenet_201.xml</value>
+ <value>caffe/FP32/dpn_92/dpn_92.xml</value>
+ <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+ <value>caffe/FP32/inception_v1/inception_v1.xml</value>
+ <value>caffe/FP32/inception_v2/inception_v2.xml</value>
+ <value>caffe/FP32/inception_v3/inception_v3.xml</value>
+ <value>caffe/FP32/inception_v4/inception_v4.xml</value>
+ <value>caffe/FP32/lenet/lenet.xml</value>
+ <value>caffe/FP32/mobilenet/mobilenet.xml</value>
+ <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+ <value>caffe/FP32/resnet_18/resnet_18.xml</value>
+ <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+ <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+ <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+ <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
+ <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
+ <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
+ <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+ <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
+ <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
+ <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
+ <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+ <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
+ <value>caffe/FP32/vgg16/vgg16.xml</value>
+ <value>caffe/FP32/vgg19/vgg19.xml</value>
+ <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
+ <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+ <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+ <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
+ <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
+ <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
+ <value>caffe/FP32/dilation/dilation.xml</value>
+ <value>caffe/FP32/dssd/dssd.xml</value>
+ <value>caffe/FP32/fcn8/fcn8.xml</value>
+ <value>caffe/FP32/fcn32/fcn32.xml</value>
+ <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
+ <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
+ <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
+ <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
+ <value>caffe/FP32/openpose_face/openpose_face.xml</value>
+ <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
+ <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
+ <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
+ <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
+ <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
+ <value>caffe/FP32/vnect/vnect.xml</value>
+ <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
+ <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
+ <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
+ <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
+ <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
+ <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
+ <value>tf/1.14.0/FP32/east/east.xml</value>
+ <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
+ <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
+ <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+ <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
+ <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
+ <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
+ <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
+ <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
+ <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
+ <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+ <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
+ <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+ <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
+ <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
+ <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
+ <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
+ <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+ <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
+ <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+ <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+ <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
+ <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
+ <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
+ <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
+ <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
+ <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
+ <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
+ <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
+ <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
+ <value>mxnet/FP32/caffenet/caffenet.xml</value>
+ <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
+ <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
+ <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
+ <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
+ <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
+ <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
+ <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
+ <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+ <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+ <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+ <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+ <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+ <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
+ <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+ <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
+ <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
+ <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
+ <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+ <value>mxnet/FP32/vgg16/vgg16.xml</value>
+ <value>mxnet/FP32/vgg19/vgg19.xml</value>
+ <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
+ <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
+ <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
+ <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+ <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
+ <value>mxnet/FP32/location_net/location_net.xml</value>
+ <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
+ <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
+ <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
+ <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
+ <value>mxnet/FP32/nin/nin.xml</value>
+ <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
+ <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
+ <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+ <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+ <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
+ <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
+ <value>onnx/FP32/retina_net/retina_net.xml</value>
+ <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
+ <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
+ <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
+ <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+ </models>
+</attributes>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <irs_path>
+ <value>/nfs/inn/proj/vdp/vdp_tests/stress_tests/master_04d6f112132f92cab563ae7655747e0359687dc9/</value>
+ </irs_path>
+</attributes>
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <models>
+ <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1321668" vmpeak="1631245" vmrss="657919" vmhwm="967408" />
+ <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1563796" vmpeak="2064987" vmrss="1227532" vmhwm="1728485" />
+ <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1589073" vmpeak="1631151" vmrss="659287" vmhwm="966721" />
+ <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1557202" vmpeak="1973197" vmrss="1079972" vmhwm="1580035" />
+ <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1341314" vmpeak="1650890" vmrss="665329" vmhwm="974724" />
+ <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1591844" vmpeak="1793074" vmrss="1255238" vmhwm="1456566" />
+ <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1441388" vmpeak="1650797" vmrss="682999" vmhwm="973897" />
+ <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1605884" vmpeak="1696297" vmrss="1128160" vmhwm="1303270" />
+ <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903562" vmpeak="903562" vmrss="180684" vmhwm="180684" />
+ <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1301939" vmpeak="1301939" vmrss="964126" vmhwm="964126" />
+ <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170582" vmpeak="1255779" vmrss="189836" vmhwm="189836" />
+ <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1057290" vmpeak="1142486" vmrss="582316" vmhwm="582316" />
+ <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1155512" vmpeak="1257531" vmrss="406551" vmhwm="508289" />
+ <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1884636" vmpeak="1884636" vmrss="1547655" vmhwm="1547655" />
+ <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1241500" vmpeak="1326696" vmrss="419666" vmhwm="506740" />
+ <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1583504" vmpeak="1668700" vmrss="1108941" vmhwm="1108941" />
+ <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992170" vmpeak="1004790" vmrss="275704" vmhwm="288189" />
+ <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1487241" vmpeak="1487241" vmrss="1150458" vmhwm="1150458" />
+ <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259122" vmpeak="1259122" vmrss="283545" vmhwm="286317" />
+ <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1294259" vmpeak="1379456" vmrss="819712" vmhwm="819712" />
+ <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1135388" vmpeak="1188803" vmrss="366688" vmhwm="384436" />
+ <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1903132" vmpeak="1903132" vmrss="1341693" vmhwm="1509783" />
+ <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1221381" vmpeak="1306578" vmrss="376038" vmhwm="384514" />
+ <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1517360" vmpeak="1602556" vmrss="1041424" vmhwm="1041424" />
+ <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="2658385" vmpeak="3374820" vmrss="1479264" vmhwm="2195507" />
+ <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="3398751" vmpeak="3980990" vmrss="3009406" vmhwm="3589695" />
+ <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2763358" vmpeak="3374727" vmrss="1996228" vmhwm="2195658" />
+ <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="3381653" vmpeak="3900676" vmrss="2904111" vmhwm="3506760" />
+ <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1254858" vmpeak="1436120" vmrss="461666" vmhwm="642226" />
+ <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1880288" vmpeak="2024947" vmrss="1544847" vmhwm="1688965" />
+ <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1529008" vmpeak="1529008" vmrss="505601" vmhwm="640972" />
+ <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1560561" vmpeak="1620039" vmrss="1084423" vmhwm="1227179" />
+ <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1467497" vmpeak="1765602" vmrss="637795" vmhwm="935719" />
+ <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1611261" vmpeak="2008177" vmrss="1219769" vmhwm="1615723" />
+ <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1771364" vmpeak="1771364" vmrss="805464" vmhwm="935511" />
+ <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1605936" vmpeak="1895415" vmrss="1127750" vmhwm="1502191" />
+ <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1436468" vmpeak="1623923" vmrss="753001" vmhwm="940030" />
+ <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2477649" vmpeak="2606604" vmrss="1727107" vmhwm="1917645" />
+ <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1704596" vmpeak="1704596" vmrss="763807" vmhwm="939510" />
+ <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2069168" vmpeak="2154365" vmrss="1592208" vmhwm="1718236" />
+ <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="755742" vmpeak="920202" vmrss="149593" vmhwm="149593" />
+ <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="941834" vmpeak="941834" vmrss="605690" vmhwm="605690" />
+ <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1012616" vmpeak="1012616" vmrss="154793" vmhwm="154793" />
+ <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="827018" vmpeak="912215" vmrss="350012" vmhwm="350012" />
+ <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="833872" vmpeak="886454" vmrss="162780" vmhwm="214853" />
+ <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1017369" vmpeak="1055308" vmrss="681980" vmhwm="719721" />
+ <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="920363" vmpeak="1005560" vmrss="167133" vmhwm="214895" />
+ <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="898206" vmpeak="983403" vmrss="419707" vmhwm="455660" />
+ <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="980382" vmpeak="1099368" vmrss="295952" vmhwm="414325" />
+ <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1322427" vmpeak="1407354" vmrss="987646" vmhwm="1072141" />
+ <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1248421" vmpeak="1248421" vmrss="307860" vmhwm="415298" />
+ <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1158170" vmpeak="1243366" vmrss="680934" vmhwm="763703" />
+ <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1188829" vmpeak="1392934" vmrss="513037" vmhwm="716632" />
+ <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1859291" vmpeak="1997377" vmrss="1524088" vmhwm="1661504" />
+ <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1456962" vmpeak="1456962" vmrss="521965" vmhwm="715650" />
+ <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1605110" vmpeak="1690306" vmrss="1127874" vmhwm="1262539" />
+ <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="694122" vmpeak="774706" vmrss="35958" vmhwm="35958" />
+ <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="617312" vmpeak="617312" vmrss="281574" vmhwm="281574" />
+ <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="961006" vmpeak="1046203" vmrss="35443" vmhwm="35443" />
+ <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="610729" vmpeak="695926" vmrss="132324" vmhwm="132324" />
+ <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720948" vmpeak="795828" vmrss="98992" vmhwm="98992" />
+ <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="770952" vmpeak="770952" vmrss="435333" vmhwm="435333" />
+ <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="987984" vmpeak="1073181" vmrss="103136" vmhwm="103136" />
+ <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727896" vmpeak="813092" vmrss="252522" vmhwm="252522" />
+ <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727100" vmpeak="727100" vmrss="92372" vmhwm="92372" />
+ <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="858800" vmpeak="858800" vmrss="523712" vmhwm="523712" />
+ <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="994151" vmpeak="1079348" vmrss="100588" vmhwm="100588" />
+ <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="763750" vmpeak="848946" vmrss="288984" vmhwm="288984" />
+ <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="774893" vmrss="34673" vmhwm="34673" />
+ <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631940" vmpeak="631940" vmrss="288189" vmhwm="288189" />
+ <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="960580" vmpeak="1045777" vmrss="35604" vmhwm="35604" />
+ <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618436" vmpeak="703632" vmrss="140368" vmhwm="140368" />
+ <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="866314" vmrss="43825" vmhwm="43825" />
+ <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="728395" vmpeak="756038" vmrss="383780" vmhwm="410545" />
+ <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="979997" vmpeak="979997" vmrss="128320" vmhwm="128320" />
+ <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="763287" vmpeak="848484" vmrss="284648" vmhwm="284648" />
+ <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30253" vmhwm="30253" />
+ <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="520577" vmpeak="523374" vmrss="126614" vmhwm="129084" />
+ <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="963367" vmpeak="1048564" vmrss="33337" vmhwm="33337" />
+ <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605597" vmpeak="690794" vmrss="128091" vmhwm="129911" />
+ <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="1141790" vmpeak="1336405" vmrss="431813" vmhwm="626236" />
+ <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="1443811" vmpeak="1566063" vmrss="1055756" vmhwm="1177592" />
+ <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1409517" vmpeak="1409517" vmrss="472004" vmhwm="625461" />
+ <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1361157" vmpeak="1446354" vmrss="883168" vmhwm="1005030" />
+ <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="1125716" vmpeak="1312344" vmrss="413764" vmhwm="600215" />
+ <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="1426141" vmpeak="1538960" vmrss="1037488" vmhwm="1149792" />
+ <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1212156" vmpeak="1312438" vmrss="455239" vmhwm="601276" />
+ <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1337679" vmpeak="1365301" vmrss="859944" vmhwm="972233" />
+ <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="1299688" vmpeak="1563577" vmrss="586242" vmhwm="849924" />
+ <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1812174" vmpeak="1997912" vmrss="1424103" vmhwm="1609166" />
+ <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1386018" vmpeak="1563577" vmrss="626147" vmhwm="849420" />
+ <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1652414" vmpeak="1755286" vmrss="1174087" vmhwm="1361599" />
+ <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1287572" vmpeak="1580612" vmrss="624582" vmhwm="917441" />
+ <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1513813" vmpeak="1998531" vmrss="1151737" vmhwm="1636216" />
+ <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1464517" vmpeak="1580597" vmrss="626922" vmhwm="916905" />
+ <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1498551" vmpeak="1889992" vmrss="1020489" vmhwm="1496653" />
+ <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="746007" vmpeak="746007" vmrss="136240" vmhwm="136240" />
+ <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="926957" vmpeak="926957" vmrss="577309" vmhwm="577309" />
+ <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1013547" vmpeak="1013547" vmrss="142885" vmhwm="142885" />
+ <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="813794" vmpeak="898991" vmrss="336570" vmhwm="336570" />
+ <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="824631" vmpeak="897722" vmrss="151590" vmhwm="210714" />
+ <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="838567" vmpeak="891956" vmrss="503739" vmhwm="557273" />
+ <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="910988" vmpeak="996184" vmrss="158886" vmhwm="211936" />
+ <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="818776" vmpeak="903973" vmrss="341322" vmhwm="391955" />
+ <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1184934" vmpeak="1406100" vmrss="511170" vmhwm="731827" />
+ <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1640386" vmpeak="1850810" vmrss="1305855" vmhwm="1515966" />
+ <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1452578" vmpeak="1452578" vmrss="518258" vmhwm="732508" />
+ <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1479166" vmpeak="1604392" vmrss="1000901" vmhwm="1210248" />
+ <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1360918" vmpeak="1658852" vmrss="684892" vmhwm="982316" />
+ <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2023595" vmpeak="2311010" vmrss="1620923" vmhwm="1906216" />
+ <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628577" vmpeak="1713774" vmrss="691672" vmhwm="982930" />
+ <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814176" vmpeak="2016393" vmrss="1336238" vmhwm="1622244" />
+ <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="2119015" vmpeak="2465268" vmrss="1307748" vmhwm="1653490" />
+ <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="3063808" vmpeak="3522360" vmrss="2673543" vmhwm="3130623" />
+ <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2386618" vmpeak="2465538" vmrss="1321663" vmhwm="1652372" />
+ <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2799269" vmpeak="3172618" vmrss="2321664" vmhwm="2777736" />
+ <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="974698" vmpeak="1100762" vmrss="304220" vmhwm="429774" />
+ <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1173671" vmpeak="1286625" vmrss="838682" vmhwm="951636" />
+ <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1242233" vmpeak="1242233" vmrss="310086" vmhwm="429150" />
+ <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1090726" vmpeak="1175922" vmrss="613813" vmhwm="726200" />
+ <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="870022" vmpeak="924336" vmrss="179088" vmhwm="232892" />
+ <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="1125753" vmpeak="1166344" vmrss="786666" vmhwm="827138" />
+ <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="1137541" vmpeak="1137541" vmrss="184485" vmhwm="232949" />
+ <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="955177" vmpeak="1040374" vmrss="477032" vmhwm="519178" />
+ <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="1074985" vmpeak="1208168" vmrss="344406" vmhwm="477089" />
+ <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="1383397" vmpeak="1496918" vmrss="980408" vmhwm="1092702" />
+ <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1168200" vmpeak="1253397" vmrss="374275" vmhwm="477698" />
+ <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1240657" vmpeak="1325854" vmrss="762725" vmhwm="854386" />
+ <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713351" vmpeak="787898" vmrss="52858" vmhwm="52858" />
+ <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="719794" vmpeak="719794" vmrss="384508" vmhwm="384508" />
+ <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="980522" vmpeak="980522" vmrss="59456" vmhwm="59456" />
+ <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="686613" vmpeak="771810" vmrss="211426" vmhwm="211426" />
+ <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705796" vmpeak="705796" vmrss="52405" vmhwm="52405" />
+ <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724984" vmpeak="724984" vmrss="390031" vmhwm="390031" />
+ <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791918" vmpeak="877115" vmrss="56269" vmhwm="56269" />
+ <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674590" vmpeak="759787" vmrss="199139" vmhwm="199139" />
+ <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="850278" vmpeak="901976" vmrss="168672" vmhwm="218660" />
+ <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="1092353" vmpeak="1123298" vmrss="689566" vmhwm="762699" />
+ <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1118015" vmpeak="1118015" vmrss="177444" vmhwm="218670" />
+ <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="944564" vmpeak="1029761" vmrss="467672" vmhwm="495326" />
+ <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="740069" vmpeak="740069" vmrss="128315" vmhwm="128315" />
+ <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="791986" vmpeak="791986" vmrss="456830" vmhwm="456830" />
+ <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="1188891" vmpeak="1274088" vmrss="138252" vmhwm="138252" />
+ <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="819218" vmpeak="904415" vmrss="342066" vmhwm="342066" />
+ <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="740714" vmpeak="803946" vmrss="126521" vmhwm="126521" />
+ <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="925225" vmpeak="925225" vmrss="519417" vmhwm="586206" />
+ <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="1008446" vmpeak="1093643" vmrss="135714" vmhwm="135714" />
+ <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="824470" vmpeak="909667" vmrss="348103" vmhwm="348103" />
+ <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046843" vmpeak="1178897" vmrss="308848" vmhwm="440377" />
+ <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1151961" vmpeak="1168070" vmrss="815692" vmhwm="831932" />
+ <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1321751" vmpeak="1321751" vmrss="373412" vmhwm="440299" />
+ <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1200820" vmpeak="1286017" vmrss="725717" vmhwm="734500" />
+ <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="1186697" vmpeak="1322895" vmrss="323164" vmhwm="457116" />
+ <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="1522606" vmpeak="1522606" vmrss="1120277" vmhwm="1120277" />
+ <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1288424" vmpeak="1373621" vmrss="500370" vmhwm="500370" />
+ <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1449448" vmpeak="1534644" vmrss="973845" vmhwm="973845" />
+ <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133809" vmpeak="2836407" vmrss="1438444" vmhwm="2140850" />
+ <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707359" vmpeak="3834188" vmrss="2314816" vmhwm="3441464" />
+ <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401339" vmpeak="3101945" vmrss="1469098" vmhwm="2139987" />
+ <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2792654" vmpeak="3834136" vmrss="2314577" vmhwm="3440408" />
+ <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188804" vmpeak="2918375" vmrss="1492623" vmhwm="2222001" />
+ <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2898989" vmpeak="4025117" vmrss="2481081" vmhwm="3626459" />
+ <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2275379" vmpeak="2918474" vmrss="1523834" vmhwm="2221715" />
+ <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2876250" vmpeak="3944834" vmrss="2398682" vmhwm="3551002" />
+ <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="873480" vmpeak="943924" vmrss="196320" vmhwm="266656" />
+ <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="1067367" vmpeak="1101604" vmrss="730048" vmhwm="764051" />
+ <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="961745" vmpeak="1046942" vmrss="212149" vmhwm="266546" />
+ <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="976471" vmpeak="1061668" vmrss="499335" vmhwm="528736" />
+ <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="1428580" vmpeak="1776923" vmrss="741670" vmhwm="1089587" />
+ <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="1842729" vmpeak="2177494" vmrss="1452183" vmhwm="1785934" />
+ <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1514890" vmpeak="1776834" vmrss="756730" vmhwm="1088464" />
+ <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1753476" vmpeak="2003045" vmrss="1275523" vmhwm="1608807" />
+ <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3478618" vmpeak="4858219" vmrss="2796794" vmhwm="4176062" />
+ <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4842442" vmpeak="6987687" vmrss="4397738" vmhwm="6544928" />
+ <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3567340" vmpeak="4858193" vmrss="2814666" vmhwm="4176177" />
+ <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4814217" vmpeak="6932785" vmrss="4335193" vmhwm="6538194" />
+ <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="998956" vmpeak="1136428" vmrss="307600" vmhwm="444735" />
+ <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1052719" vmpeak="1232316" vmrss="717854" vmhwm="897540" />
+ <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1258004" vmpeak="1258004" vmrss="326175" vmhwm="443996" />
+ <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1059619" vmpeak="1138789" vmrss="582155" vmhwm="745664" />
+ <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1249211" vmpeak="1506304" vmrss="550752" vmhwm="807762" />
+ <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1492743" vmpeak="1714642" vmrss="1095354" vmhwm="1316988" />
+ <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1427483" vmpeak="1512680" vmrss="582514" vmhwm="806858" />
+ <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1456343" vmpeak="1595287" vmrss="978369" vmhwm="1201579" />
+ <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="871930" vmpeak="952359" vmrss="193388" vmhwm="273634" />
+ <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="878768" vmpeak="973180" vmrss="533348" vmhwm="627848" />
+ <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="959909" vmpeak="1045106" vmrss="208156" vmhwm="273530" />
+ <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="883818" vmpeak="969014" vmrss="406442" vmhwm="476595" />
+ <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388405" vmpeak="1700311" vmrss="680352" vmhwm="991998" />
+ <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1970503" vmpeak="2164422" vmrss="1583935" vmhwm="1777209" />
+ <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1661649" vmpeak="1746846" vmrss="723148" vmhwm="991354" />
+ <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1812694" vmpeak="1917910" vmrss="1335609" vmhwm="1524931" />
+ <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1321320" vmpeak="1630896" vmrss="658730" vmhwm="968125" />
+ <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1563660" vmpeak="2064852" vmrss="1226097" vmhwm="1727050" />
+ <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1679251" vmpeak="1849645" vmrss="659406" vmhwm="966815" />
+ <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1557181" vmpeak="1973176" vmrss="1079998" vmhwm="1579983" />
+ <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="908549" vmpeak="908549" vmrss="180804" vmhwm="180804" />
+ <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1315620" vmpeak="1315620" vmrss="978213" vmhwm="978213" />
+ <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170239" vmpeak="1255436" vmrss="189326" vmhwm="189326" />
+ <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1068553" vmpeak="1153750" vmrss="590298" vmhwm="590298" />
+ <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1160718" vmpeak="1262736" vmrss="405376" vmhwm="507317" />
+ <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1898410" vmpeak="1898410" vmrss="1560884" vmhwm="1560884" />
+ <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1240917" vmpeak="1326114" vmrss="419094" vmhwm="507306" />
+ <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1594502" vmpeak="1679698" vmrss="1116954" vmhwm="1116954" />
+ <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="991671" vmpeak="1004291" vmrss="275397" vmhwm="287918" />
+ <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1616690" vmpeak="1618188" vmrss="1278908" vmhwm="1280494" />
+ <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1258623" vmpeak="1258623" vmrss="284320" vmhwm="287606" />
+ <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1303156" vmpeak="1388353" vmrss="824928" vmhwm="824928" />
+ <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1134889" vmpeak="1188636" vmrss="367130" vmhwm="384935" />
+ <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1865047" vmpeak="1865047" vmrss="1527947" vmhwm="1527947" />
+ <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1220882" vmpeak="1306078" vmrss="376006" vmhwm="384217" />
+ <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1551019" vmpeak="1636216" vmrss="1071928" vmhwm="1071928" />
+ <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1255898" vmpeak="1437160" vmrss="461385" vmhwm="642049" />
+ <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1800479" vmpeak="1945580" vmrss="1462780" vmhwm="1607470" />
+ <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1530053" vmpeak="1530053" vmrss="505570" vmhwm="641368" />
+ <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1561955" vmpeak="1619753" vmrss="1084324" vmhwm="1225473" />
+ <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2833797" vmpeak="3516609" vmrss="1409798" vmhwm="2092417" />
+ <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="4293634" vmpeak="4293634" vmrss="3955525" vmhwm="3955525" />
+ <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="3022032" vmpeak="3516609" vmrss="2255333" vmhwm="2255333" />
+ <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="4277993" vmpeak="4363190" vmrss="3799333" vmhwm="3799333" />
+ <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="1066384" vmpeak="1233736" vmrss="390972" vmhwm="557528" />
+ <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="1358442" vmpeak="1615062" vmrss="1020947" vmhwm="1273121" />
+ <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1243392" vmpeak="1328589" vmrss="398580" vmhwm="558469" />
+ <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1256070" vmpeak="1398212" vmrss="778549" vmhwm="1001192" />
+ <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437560" vmpeak="1625010" vmrss="754254" vmhwm="941142" />
+ <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2281713" vmpeak="2410668" vmrss="1943780" vmhwm="2072428" />
+ <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524473" vmpeak="1625005" vmrss="763001" vmhwm="940264" />
+ <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2070671" vmpeak="2155868" vmrss="1593108" vmhwm="1719125" />
+ <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="835629" vmpeak="889226" vmrss="164216" vmhwm="217245" />
+ <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="983507" vmpeak="1024665" vmrss="645985" vmhwm="686930" />
+ <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="927451" vmpeak="1012648" vmrss="168360" vmhwm="216569" />
+ <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="900712" vmpeak="985909" vmrss="423519" vmhwm="463533" />
+ <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="980636" vmpeak="1099706" vmrss="296680" vmhwm="415194" />
+ <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="1326213" vmpeak="1409371" vmrss="988488" vmhwm="1071366" />
+ <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1248691" vmpeak="1248691" vmrss="306857" vmhwm="414752" />
+ <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1163032" vmpeak="1248228" vmrss="685843" vmhwm="765507" />
+ <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189531" vmpeak="1393636" vmrss="513661" vmhwm="717204" />
+ <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1866176" vmpeak="2002847" vmrss="1528664" vmhwm="1664577" />
+ <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1457669" vmpeak="1457669" vmrss="523811" vmhwm="715837" />
+ <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1606243" vmpeak="1691440" vmrss="1129185" vmhwm="1262534" />
+ <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="1521920" vmpeak="1894167" vmrss="814210" vmhwm="1185704" />
+ <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1961772" vmpeak="2317998" vmrss="1623268" vmhwm="1979062" />
+ <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1789325" vmpeak="1894157" vmrss="828328" vmhwm="1185480" />
+ <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1951877" vmpeak="2240295" vmrss="1479337" vmhwm="1843041" />
+ <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="1427384" vmpeak="1755920" vmrss="719097" vmhwm="1047295" />
+ <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="2059070" vmpeak="2371101" vmrss="1721616" vmhwm="2033194" />
+ <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1694035" vmpeak="1779232" vmrss="732596" vmhwm="1046208" />
+ <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1863825" vmpeak="2084664" vmrss="1386002" vmhwm="1691248" />
+ <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720959" vmpeak="795839" vmrss="98898" vmhwm="98898" />
+ <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="749106" vmpeak="749106" vmrss="411049" vmhwm="411049" />
+ <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="806941" vmpeak="806941" vmrss="104702" vmhwm="104702" />
+ <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727818" vmpeak="813014" vmrss="252787" vmhwm="252787" />
+ <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727116" vmpeak="793010" vmrss="92508" vmhwm="92508" />
+ <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="817554" vmpeak="817554" vmrss="479762" vmhwm="479762" />
+ <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="813108" vmpeak="898305" vmrss="99481" vmhwm="99481" />
+ <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="765070" vmpeak="850267" vmrss="290040" vmhwm="290040" />
+ <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="694023" vmrss="34377" vmhwm="34377" />
+ <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631919" vmpeak="631919" vmrss="294070" vmhwm="294070" />
+ <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="779532" vmpeak="864728" vmrss="36524" vmhwm="36524" />
+ <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618586" vmpeak="703783" vmrss="140582" vmhwm="140582" />
+ <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="783447" vmrss="42936" vmhwm="42936" />
+ <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="724302" vmpeak="724302" vmrss="386261" vmhwm="386339" />
+ <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="1070524" vmpeak="1155720" vmrss="129376" vmhwm="129376" />
+ <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="762933" vmpeak="848130" vmrss="284216" vmhwm="284216" />
+ <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30700" vmhwm="30700" />
+ <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="588270" vmpeak="610240" vmrss="250692" vmhwm="269453" />
+ <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="958042" vmpeak="958042" vmrss="30908" vmhwm="30908" />
+ <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605176" vmpeak="690372" vmrss="127602" vmhwm="129365" />
+ <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="732747" vmpeak="732747" vmrss="146874" vmhwm="146874" />
+ <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="778096" vmpeak="778096" vmrss="439654" vmhwm="439654" />
+ <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="818864" vmpeak="904061" vmrss="148220" vmhwm="148220" />
+ <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="781279" vmpeak="866476" vmrss="323528" vmhwm="323528" />
+ <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="739559" vmpeak="739559" vmrss="67152" vmhwm="67152" />
+ <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="769938" vmpeak="769938" vmrss="431922" vmhwm="431922" />
+ <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="1007323" vmpeak="1007323" vmrss="99127" vmhwm="99127" />
+ <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="760047" vmpeak="845244" vmrss="281866" vmhwm="281866" />
+ <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1219296" vmpeak="1440462" vmrss="513271" vmhwm="733850" />
+ <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1693062" vmpeak="1898192" vmrss="1355270" vmhwm="1559838" />
+ <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1305881" vmpeak="1440556" vmrss="527399" vmhwm="732924" />
+ <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1500881" vmpeak="1620819" vmrss="1022845" vmhwm="1226721" />
+ <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1406802" vmpeak="1704736" vmrss="687445" vmhwm="984760" />
+ <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2147516" vmpeak="2429642" vmrss="1810073" vmhwm="2091382" />
+ <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1674363" vmpeak="1759560" vmrss="702972" vmhwm="984744" />
+ <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1849614" vmpeak="2046543" vmrss="1371458" vmhwm="1652222" />
+ <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1218568" vmpeak="1439734" vmrss="513505" vmhwm="734136" />
+ <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1688476" vmpeak="1897693" vmrss="1350502" vmhwm="1559168" />
+ <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1305106" vmpeak="1439828" vmrss="526188" vmhwm="732721" />
+ <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1498400" vmpeak="1619649" vmrss="1021170" vmhwm="1226201" />
+ <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1406007" vmpeak="1703941" vmrss="687798" vmhwm="985082" />
+ <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2132431" vmpeak="2419976" vmrss="1795331" vmhwm="2082298" />
+ <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1673562" vmpeak="1758759" vmrss="702202" vmhwm="984557" />
+ <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1852832" vmpeak="2055175" vmrss="1375025" vmhwm="1661046" />
+ <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="1214486" vmpeak="1422704" vmrss="531008" vmhwm="738576" />
+ <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1653386" vmpeak="1850721" vmrss="1316047" vmhwm="1513090" />
+ <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1307545" vmpeak="1422720" vmrss="553290" vmhwm="739018" />
+ <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1505826" vmpeak="1597455" vmrss="1028154" vmhwm="1203888" />
+ <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="1639840" vmpeak="2058960" vmrss="933025" vmhwm="1351495" />
+ <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="2290340" vmpeak="2674006" vmrss="1952048" vmhwm="2335455" />
+ <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1914021" vmpeak="2149482" vmrss="959363" vmhwm="1351006" />
+ <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="2119436" vmpeak="2416320" vmrss="1662554" vmhwm="2022462" />
+ <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705806" vmpeak="780353" vmrss="52806" vmhwm="52806" />
+ <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="700835" vmpeak="700835" vmrss="362949" vmhwm="362949" />
+ <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791934" vmpeak="791934" vmrss="56794" vmhwm="56794" />
+ <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674611" vmpeak="759808" vmrss="198120" vmhwm="198120" />
+ <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046858" vmpeak="1178912" vmrss="308542" vmhwm="439483" />
+ <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1226248" vmpeak="1247022" vmrss="889018" vmhwm="909454" />
+ <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1140729" vmpeak="1225926" vmrss="372574" vmhwm="439826" />
+ <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1199894" vmpeak="1285091" vmrss="724178" vmhwm="734505" />
+ <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2134158" vmpeak="2836756" vmrss="1438309" vmhwm="2140715" />
+ <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2781932" vmpeak="3912818" vmrss="2443178" vmhwm="3574105" />
+ <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2220634" vmpeak="2836865" vmrss="1468797" vmhwm="2139722" />
+ <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2790174" vmpeak="3834277" vmrss="2311826" vmhwm="3439888" />
+ <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2189153" vmpeak="2918723" vmrss="1491048" vmhwm="2220868" />
+ <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2869105" vmpeak="4001228" vmrss="2531100" vmhwm="3662869" />
+ <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2366254" vmpeak="2918817" vmrss="1523605" vmhwm="2221388" />
+ <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877716" vmpeak="3944751" vmrss="2400091" vmhwm="3551449" />
+ <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3569482" vmpeak="4949084" vmrss="2797106" vmhwm="4176364" />
+ <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4819713" vmpeak="6984764" vmrss="4481042" vmhwm="6645126" />
+ <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3929790" vmpeak="4858536" vmrss="2814931" vmhwm="4176198" />
+ <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4816962" vmpeak="6932770" vmrss="4337715" vmhwm="6538006" />
+ <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="1172662" vmpeak="1401509" vmrss="491966" vmhwm="720564" />
+ <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1345822" vmpeak="1585391" vmrss="1008384" vmhwm="1247916" />
+ <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1442381" vmpeak="1442381" vmrss="510697" vmhwm="720267" />
+ <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1348219" vmpeak="1513917" vmrss="870485" vmhwm="1120215" />
+ <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="1106159" vmpeak="1204460" vmrss="268408" vmhwm="366470" />
+ <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="1568190" vmpeak="1568190" vmrss="1230538" vmhwm="1230538" />
+ <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1395617" vmpeak="1395617" vmrss="399692" vmhwm="399692" />
+ <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1513621" vmpeak="1598818" vmrss="1035897" vmhwm="1035897" />
+ <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="1108187" vmpeak="1206488" vmrss="271648" vmhwm="369590" />
+ <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="2870816" vmpeak="2870816" vmrss="1290972" vmhwm="1290972" />
+ <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1396408" vmpeak="1396408" vmrss="396172" vmhwm="396172" />
+ <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2778490" vmpeak="2863686" vmrss="2307058" vmhwm="2307058" />
+ <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="979706" vmpeak="1098692" vmrss="295682" vmhwm="414247" />
+ <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1303499" vmpeak="1390069" vmrss="965224" vmhwm="1051580" />
+ <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1247750" vmpeak="1247750" vmrss="307928" vmhwm="415266" />
+ <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1160265" vmpeak="1245462" vmrss="682354" vmhwm="766100" />
+ <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304610" vmhwm="430336" />
+ <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1170265" vmpeak="1281675" vmrss="833180" vmhwm="944299" />
+ <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1253189" vmpeak="1253189" vmrss="316373" vmhwm="429618" />
+ <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1091214" vmpeak="1176411" vmrss="613095" vmhwm="724110" />
+ <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304772" vmhwm="430414" />
+ <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="1150806" vmpeak="1261878" vmrss="813394" vmhwm="924123" />
+ <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1253194" vmpeak="1253194" vmrss="315463" vmhwm="428974" />
+ <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1090070" vmpeak="1175267" vmrss="612274" vmhwm="722924" />
+ <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="705577" vmpeak="780457" vmrss="53320" vmhwm="53320" />
+ <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="716476" vmpeak="716476" vmrss="378487" vmhwm="378487" />
+ <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="972613" vmpeak="1057810" vmrss="57033" vmhwm="57033" />
+ <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="672594" vmpeak="757790" vmrss="194183" vmhwm="194183" />
+ <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="1863586" vmpeak="2298270" vmrss="1166578" vmhwm="1601236" />
+ <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="3438385" vmpeak="3992487" vmrss="3100890" vmhwm="3654268" />
+ <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="2136893" vmpeak="2298270" vmrss="1177888" vmhwm="1601350" />
+ <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2866156" vmpeak="3332056" vmrss="2390778" vmhwm="2939315" />
+ <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="1795970" vmpeak="2230654" vmrss="1095978" vmhwm="1530557" />
+ <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="3373229" vmpeak="3883687" vmrss="3035104" vmhwm="3545068" />
+ <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="2069298" vmpeak="2230675" vmrss="1108967" vmhwm="1530178" />
+ <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2783367" vmpeak="3206626" vmrss="2308222" vmhwm="2813283" />
+ <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="1389767" vmpeak="1653657" vmrss="587459" vmhwm="851136" />
+ <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1997091" vmpeak="1999374" vmrss="1659538" vmhwm="1661498" />
+ <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1660250" vmpeak="1660250" vmrss="717350" vmhwm="850948" />
+ <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1842703" vmpeak="1927900" vmrss="1363991" vmhwm="1363991" />
+ <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="783562" vmpeak="783562" vmrss="74089" vmhwm="74089" />
+ <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="976300" vmpeak="976300" vmrss="639132" vmhwm="639132" />
+ <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="1055204" vmpeak="1140401" vmrss="135018" vmhwm="135018" />
+ <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="895616" vmpeak="980813" vmrss="418631" vmhwm="418631" />
+ <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903520" vmpeak="903520" vmrss="182405" vmhwm="182405" />
+ <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1300780" vmpeak="1300780" vmrss="963144" vmhwm="963144" />
+ <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1261171" vmpeak="1346368" vmrss="191354" vmhwm="191354" />
+ <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1066088" vmpeak="1151285" vmrss="588608" vmhwm="588608" />
+ <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992097" vmpeak="1004718" vmrss="276021" vmhwm="288532" />
+ <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1673510" vmpeak="1686178" vmrss="1335256" vmhwm="1346415" />
+ <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259304" vmpeak="1259304" vmrss="285667" vmhwm="288584" />
+ <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1318803" vmpeak="1404000" vmrss="840652" vmhwm="840652" />
+ <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="742190" vmpeak="801429" vmrss="120036" vmhwm="120036" />
+ <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="917155" vmpeak="917155" vmrss="580470" vmhwm="580470" />
+ <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="828079" vmpeak="828079" vmrss="124950" vmhwm="124950" />
+ <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="798803" vmpeak="884000" vmrss="322223" vmhwm="322223" />
+ <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="1036542" vmpeak="1123340" vmrss="332675" vmhwm="418984" />
+ <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1419095" vmpeak="1503018" vmrss="1081142" vmhwm="1164966" />
+ <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1122513" vmpeak="1207710" vmrss="333564" vmhwm="417877" />
+ <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1206654" vmpeak="1291851" vmrss="729799" vmhwm="812141" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2502557" vmpeak="2710479" vmrss="803394" vmhwm="1011098" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4844647" vmpeak="4844647" vmrss="4505820" vmhwm="4505820" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="927518" vmpeak="990735" vmrss="192327" vmhwm="255424" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1410156" vmpeak="1410156" vmrss="1071818" vmhwm="1071818" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1348308" vmpeak="1587736" vmrss="555162" vmhwm="794456" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2073328" vmpeak="2139914" vmrss="1735650" vmhwm="1801794" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="1137926" vmpeak="1282252" vmrss="347172" vmhwm="491384" />
+ <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="1528581" vmpeak="1558133" vmrss="1191273" vmhwm="1220918" />
+ <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="1064445" vmpeak="1124276" vmrss="233131" vmhwm="292728" />
+ <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="1608666" vmpeak="1608666" vmrss="1270744" vmhwm="1270744" />
+ <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1209941" vmpeak="1295138" vmrss="396422" vmhwm="396422" />
+ <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1593238" vmpeak="1678435" vmrss="1137583" vmhwm="1257484" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713814" vmpeak="788028" vmrss="53034" vmhwm="53034" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="701729" vmpeak="701729" vmrss="363578" vmhwm="363578" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="799869" vmpeak="885066" vmrss="59810" vmhwm="59810" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="687694" vmpeak="772891" vmrss="209248" vmhwm="209248" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="706258" vmpeak="780140" vmrss="52884" vmhwm="52884" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="705052" vmpeak="705052" vmrss="367395" vmhwm="367395" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="973367" vmpeak="1058564" vmrss="56414" vmhwm="56414" />
+ <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677320" vmpeak="762517" vmrss="198619" vmhwm="198619" />
+ <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437061" vmpeak="1624516" vmrss="755024" vmhwm="942141" />
+ <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2478034" vmpeak="2597150" vmrss="2139680" vmhwm="2258219" />
+ <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524120" vmpeak="1624521" vmrss="762559" vmhwm="940914" />
+ <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2100274" vmpeak="2185471" vmrss="1622847" vmhwm="1739566" />
+ <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="748534" vmpeak="809437" vmrss="143514" vmhwm="143514" />
+ <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="943758" vmpeak="943758" vmrss="606392" vmhwm="606392" />
+ <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1015783" vmpeak="1015783" vmrss="147118" vmhwm="147118" />
+ <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="835073" vmpeak="920270" vmrss="357146" vmhwm="357146" />
+ <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="834953" vmpeak="887541" vmrss="164626" vmhwm="217001" />
+ <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1034649" vmpeak="1064835" vmrss="696592" vmhwm="726694" />
+ <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="921081" vmpeak="1006278" vmrss="167502" vmhwm="215597" />
+ <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="911310" vmpeak="996507" vmrss="433617" vmhwm="464682" />
+ <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="971453" vmpeak="1081683" vmrss="305390" vmhwm="415204" />
+ <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1332598" vmpeak="1413375" vmrss="995165" vmhwm="1075859" />
+ <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1148685" vmpeak="1233882" vmrss="314220" vmhwm="414882" />
+ <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1167634" vmpeak="1252830" vmrss="689416" vmhwm="769002" />
+ <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189630" vmpeak="1393740" vmrss="511908" vmhwm="715540" />
+ <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1867418" vmpeak="2007080" vmrss="1529990" vmhwm="1668929" />
+ <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1367256" vmpeak="1452453" vmrss="523946" vmhwm="715577" />
+ <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1611350" vmpeak="1696546" vmrss="1133615" vmhwm="1270427" />
+ <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2715268" vmpeak="3061650" vmrss="776375" vmhwm="1122695" />
+ <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4160156" vmpeak="4971210" vmrss="3823164" vmhwm="4634151" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="701350" vmpeak="776562" vmrss="42281" vmhwm="42281" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="717771" vmpeak="717771" vmrss="379501" vmhwm="379501" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="786552" vmpeak="786552" vmrss="42406" vmhwm="42406" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="656084" vmpeak="741280" vmrss="177543" vmhwm="177543" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="705936" vmpeak="781149" vmrss="55619" vmhwm="55619" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="724765" vmpeak="724765" vmrss="386458" vmhwm="386458" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="791554" vmpeak="791554" vmrss="55582" vmhwm="55582" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="670987" vmpeak="756184" vmrss="193029" vmhwm="193029" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="720673" vmpeak="720673" vmrss="99512" vmhwm="99512" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="771253" vmpeak="771253" vmrss="433087" vmhwm="433087" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="987828" vmpeak="1073025" vmrss="104005" vmhwm="104005" />
+ <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="726986" vmpeak="812182" vmrss="248450" vmhwm="248450" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="726554" vmpeak="793447" vmrss="91452" vmhwm="91452" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="857027" vmpeak="857027" vmrss="519630" vmhwm="519630" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="812619" vmpeak="897816" vmrss="100895" vmhwm="100895" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="764800" vmpeak="849997" vmrss="287019" vmhwm="287019" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="739960" vmpeak="739960" vmrss="134924" vmhwm="134924" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="905439" vmpeak="905439" vmrss="567876" vmhwm="567876" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="825988" vmpeak="891722" vmrss="144684" vmhwm="144684" />
+ <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="821251" vmpeak="906448" vmrss="343085" vmhwm="343085" />
+ <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="1026407" vmpeak="1026407" vmrss="351535" vmhwm="351535" />
+ <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="1104485" vmpeak="1149496" vmrss="766740" vmhwm="811642" />
+ <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1209280" vmpeak="1209280" vmrss="362325" vmhwm="362325" />
+ <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1105275" vmpeak="1190472" vmrss="627822" vmhwm="671450" />
+ <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="988072" vmpeak="1114146" vmrss="304798" vmhwm="430279" />
+ <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="1171383" vmpeak="1282325" vmrss="833705" vmhwm="944476" />
+ <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1164982" vmpeak="1250178" vmrss="319394" vmhwm="429904" />
+ <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1090481" vmpeak="1115056" vmrss="613485" vmhwm="722176" />
+ <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1185163" vmpeak="1406329" vmrss="511669" vmhwm="732674" />
+ <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1646897" vmpeak="1857653" vmrss="1308538" vmhwm="1518940" />
+ <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1361906" vmpeak="1447102" vmrss="515138" vmhwm="731073" />
+ <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1486612" vmpeak="1612171" vmrss="1008602" vmhwm="1218973" />
+ <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1361328" vmpeak="1659262" vmrss="685287" vmhwm="983091" />
+ <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2053204" vmpeak="2340951" vmrss="1714788" vmhwm="2002072" />
+ <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628504" vmpeak="1713701" vmrss="690892" vmhwm="983257" />
+ <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1817290" vmpeak="2019841" vmrss="1338792" vmhwm="1625405" />
+ <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="980148" vmpeak="1106211" vmrss="304340" vmhwm="430242" />
+ <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1177410" vmpeak="1291040" vmrss="839217" vmhwm="952868" />
+ <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1060997" vmpeak="1146194" vmrss="308906" vmhwm="429811" />
+ <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1094189" vmpeak="1123038" vmrss="616548" vmhwm="730298" />
+ <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1217086" vmpeak="1438262" vmrss="515611" vmhwm="736502" />
+ <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1721532" vmpeak="1922648" vmrss="1383304" vmhwm="1584195" />
+ <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1394296" vmpeak="1479493" vmrss="530197" vmhwm="735883" />
+ <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1533625" vmpeak="1649492" vmrss="1055813" vmhwm="1256236" />
+ <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1664005" vmpeak="1929070" vmrss="791611" vmhwm="988280" />
+ <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2054062" vmpeak="2324472" vmrss="1715776" vmhwm="1985344" />
+ <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1750642" vmpeak="1750642" vmrss="806811" vmhwm="988041" />
+ <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1905020" vmpeak="2088814" vmrss="1426682" vmhwm="1694347" />
+ <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="994541" vmpeak="1120615" vmrss="307034" vmhwm="432806" />
+ <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="1212042" vmpeak="1312194" vmrss="874780" vmhwm="974438" />
+ <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1081334" vmpeak="1166531" vmrss="322436" vmhwm="432702" />
+ <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1116720" vmpeak="1132315" vmrss="638097" vmhwm="738348" />
+ <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1467762" vmpeak="1671108" vmrss="691412" vmhwm="894509" />
+ <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2625381" vmpeak="2732168" vmrss="2288915" vmhwm="2392494" />
+ <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="713590" vmpeak="788138" vmrss="53216" vmhwm="53216" />
+ <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724427" vmpeak="724427" vmrss="386354" vmhwm="386354" />
+ <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="799604" vmpeak="799604" vmrss="59534" vmhwm="59534" />
+ <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="685677" vmpeak="770874" vmrss="206845" vmhwm="206845" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="832010" vmpeak="832010" vmrss="144367" vmhwm="144367" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="920249" vmpeak="920249" vmrss="582769" vmhwm="582769" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="1009200" vmpeak="1094397" vmrss="156052" vmhwm="156052" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="851666" vmpeak="936863" vmrss="374660" vmhwm="374660" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="1357855" vmpeak="1537842" vmrss="428038" vmhwm="602841" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1748255" vmpeak="1748255" vmrss="1410474" vmhwm="1410474" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1539933" vmpeak="1625130" vmrss="506157" vmhwm="602326" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597762" vmpeak="1597762" vmrss="1125956" vmhwm="1125956" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="1508566" vmpeak="1688554" vmrss="427086" vmhwm="602414" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1694071" vmpeak="1694071" vmrss="1356300" vmhwm="1356300" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1418346" vmpeak="1507495" vmrss="498206" vmhwm="602238" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1533370" vmpeak="1618567" vmrss="1062006" vmhwm="1062006" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="912147" vmpeak="990698" vmrss="224068" vmhwm="302484" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1144707" vmpeak="1222395" vmrss="807570" vmhwm="885076" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="998842" vmpeak="1048663" vmrss="239059" vmhwm="302291" />
+ <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1054336" vmpeak="1139533" vmrss="577106" vmhwm="651913" />
+ <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="1046905" vmpeak="1206301" vmrss="351400" vmhwm="510603" />
+ <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="1199005" vmpeak="1333363" vmrss="861400" vmhwm="995815" />
+ <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1132003" vmpeak="1217200" vmrss="380998" vmhwm="509615" />
+ <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1174336" vmpeak="1259533" vmrss="696300" vmhwm="857849" />
+ <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133768" vmpeak="2836366" vmrss="1437966" vmhwm="2140403" />
+ <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2803710" vmpeak="3934762" vmrss="2464961" vmhwm="3596054" />
+ <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2400741" vmpeak="2836230" vmrss="1468438" vmhwm="2139410" />
+ <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793221" vmpeak="3855737" vmrss="2313766" vmhwm="3461135" />
+ <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188924" vmpeak="2918494" vmrss="1491630" vmhwm="2221008" />
+ <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2899624" vmpeak="4031731" vmrss="2561410" vmhwm="3693086" />
+ <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2274792" vmpeak="2918401" vmrss="1523438" vmhwm="2221039" />
+ <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877160" vmpeak="3966222" vmrss="2398546" vmhwm="3572186" />
+ <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1252357" vmpeak="1511010" vmrss="552931" vmhwm="811361" />
+ <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1481464" vmpeak="1701512" vmrss="1144072" vmhwm="1363939" />
+ <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1340471" vmpeak="1510438" vmrss="585192" vmhwm="810186" />
+ <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1465339" vmpeak="1601189" vmrss="987604" vmhwm="1207902" />
+ <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="872019" vmpeak="952447" vmrss="192904" vmhwm="272953" />
+ <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="876340" vmpeak="970054" vmrss="538460" vmhwm="632299" />
+ <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="959992" vmpeak="1045189" vmrss="207662" vmhwm="273093" />
+ <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="883292" vmpeak="968489" vmrss="405891" vmhwm="476907" />
+ <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="1248988" vmpeak="1505738" vmrss="549031" vmhwm="805745" />
+ <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="1459816" vmpeak="1681716" vmrss="1121952" vmhwm="1343638" />
+ <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1337055" vmpeak="1506221" vmrss="582212" vmhwm="806447" />
+ <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1456322" vmpeak="1589104" vmrss="977688" vmhwm="1194798" />
+ <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388498" vmpeak="1700405" vmrss="680981" vmhwm="992706" />
+ <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1904952" vmpeak="2102276" vmrss="1567898" vmhwm="1764921" />
+ <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1486066" vmpeak="1705636" vmrss="724443" vmhwm="992409" />
+ <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1809121" vmpeak="1916995" vmrss="1331512" vmhwm="1523137" />
+ <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="803400" vmpeak="848244" vmrss="123765" vmhwm="168360" />
+ <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="795683" vmpeak="825796" vmrss="458718" vmhwm="488498" />
+ <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="892273" vmpeak="977470" vmrss="139048" vmhwm="168292" />
+ <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="789438" vmpeak="874634" vmrss="312400" vmhwm="338832" />
+ </models>
+</attributes>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <devices>
+ <value>CPU</value>
+ <value>GPU</value>
+ </devices>
+ <models>
+ <value>caffe/FP32/alexnet/alexnet.xml</value>
+ <value>caffe/FP32/caffenet/caffenet.xml</value>
+ <value>caffe/FP32/densenet_121/densenet_121.xml</value>
+ <value>caffe/FP32/densenet_161/densenet_161.xml</value>
+ <value>caffe/FP32/densenet_169/densenet_169.xml</value>
+ <value>caffe/FP32/densenet_201/densenet_201.xml</value>
+ <value>caffe/FP32/dpn_92/dpn_92.xml</value>
+ <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+ <value>caffe/FP32/inception_v1/inception_v1.xml</value>
+ <value>caffe/FP32/inception_v2/inception_v2.xml</value>
+ <value>caffe/FP32/inception_v3/inception_v3.xml</value>
+ <value>caffe/FP32/inception_v4/inception_v4.xml</value>
+ <value>caffe/FP32/lenet/lenet.xml</value>
+ <value>caffe/FP32/mobilenet/mobilenet.xml</value>
+ <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+ <value>caffe/FP32/resnet_18/resnet_18.xml</value>
+ <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+ <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+ <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+ <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
+ <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
+ <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
+ <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+ <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
+ <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
+ <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
+ <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+ <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
+ <value>caffe/FP32/vgg16/vgg16.xml</value>
+ <value>caffe/FP32/vgg19/vgg19.xml</value>
+ <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
+ <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+ <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+ <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
+ <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
+ <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
+ <value>caffe/FP32/dilation/dilation.xml</value>
+ <value>caffe/FP32/dssd/dssd.xml</value>
+ <value>caffe/FP32/fcn8/fcn8.xml</value>
+ <value>caffe/FP32/fcn32/fcn32.xml</value>
+ <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
+ <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
+ <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
+ <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
+ <value>caffe/FP32/openpose_face/openpose_face.xml</value>
+ <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
+ <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
+ <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
+ <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
+ <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
+ <value>caffe/FP32/vnect/vnect.xml</value>
+ <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
+ <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
+ <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
+ <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
+ <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
+ <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
+ <value>tf/1.14.0/FP32/east/east.xml</value>
+ <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
+ <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
+ <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+ <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
+ <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
+ <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
+ <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
+ <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
+ <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
+ <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+ <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
+ <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+ <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
+ <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
+ <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
+ <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
+ <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
+ <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+ <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+ <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
+ <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+ <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+ <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
+ <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
+ <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
+ <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
+ <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
+ <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
+ <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
+ <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
+ <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
+ <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
+ <value>mxnet/FP32/caffenet/caffenet.xml</value>
+ <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
+ <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
+ <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
+ <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
+ <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
+ <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
+ <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
+ <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+ <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+ <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+ <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+ <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+ <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
+ <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+ <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
+ <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
+ <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
+ <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+ <value>mxnet/FP32/vgg16/vgg16.xml</value>
+ <value>mxnet/FP32/vgg19/vgg19.xml</value>
+ <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
+ <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
+ <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
+ <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+ <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
+ <value>mxnet/FP32/location_net/location_net.xml</value>
+ <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
+ <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
+ <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
+ <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
+ <value>mxnet/FP32/nin/nin.xml</value>
+ <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
+ <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
+ <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+ <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+ <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
+ <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
+ <value>onnx/FP32/retina_net/retina_net.xml</value>
+ <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
+ <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
+ <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
+ <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+ </models>
+</attributes>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <irs_path>
+ <value>${STRESS_IRS_PATH}</value>
+ </irs_path>
+</attributes>
<?xml version="1.0"?>
<attributes>
<models>
- <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1321668" vmpeak="1631245" vmrss="657919" vmhwm="967408" />
- <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1563796" vmpeak="2064987" vmrss="1227532" vmhwm="1728485" />
- <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1589073" vmpeak="1631151" vmrss="659287" vmhwm="966721" />
- <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1557202" vmpeak="1973197" vmrss="1079972" vmhwm="1580035" />
- <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1341314" vmpeak="1650890" vmrss="665329" vmhwm="974724" />
- <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1591844" vmpeak="1793074" vmrss="1255238" vmhwm="1456566" />
- <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1441388" vmpeak="1650797" vmrss="682999" vmhwm="973897" />
- <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1605884" vmpeak="1696297" vmrss="1128160" vmhwm="1303270" />
- <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903562" vmpeak="903562" vmrss="180684" vmhwm="180684" />
- <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1301939" vmpeak="1301939" vmrss="964126" vmhwm="964126" />
- <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170582" vmpeak="1255779" vmrss="189836" vmhwm="189836" />
- <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1057290" vmpeak="1142486" vmrss="582316" vmhwm="582316" />
- <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1155512" vmpeak="1257531" vmrss="406551" vmhwm="508289" />
- <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1884636" vmpeak="1884636" vmrss="1547655" vmhwm="1547655" />
- <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1241500" vmpeak="1326696" vmrss="419666" vmhwm="506740" />
- <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1583504" vmpeak="1668700" vmrss="1108941" vmhwm="1108941" />
- <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992170" vmpeak="1004790" vmrss="275704" vmhwm="288189" />
- <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1487241" vmpeak="1487241" vmrss="1150458" vmhwm="1150458" />
- <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259122" vmpeak="1259122" vmrss="283545" vmhwm="286317" />
- <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1294259" vmpeak="1379456" vmrss="819712" vmhwm="819712" />
- <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1135388" vmpeak="1188803" vmrss="366688" vmhwm="384436" />
- <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1903132" vmpeak="1903132" vmrss="1341693" vmhwm="1509783" />
- <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1221381" vmpeak="1306578" vmrss="376038" vmhwm="384514" />
- <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1517360" vmpeak="1602556" vmrss="1041424" vmhwm="1041424" />
- <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="2658385" vmpeak="3374820" vmrss="1479264" vmhwm="2195507" />
- <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="3398751" vmpeak="3980990" vmrss="3009406" vmhwm="3589695" />
- <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2763358" vmpeak="3374727" vmrss="1996228" vmhwm="2195658" />
- <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="3381653" vmpeak="3900676" vmrss="2904111" vmhwm="3506760" />
- <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1254858" vmpeak="1436120" vmrss="461666" vmhwm="642226" />
- <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1880288" vmpeak="2024947" vmrss="1544847" vmhwm="1688965" />
- <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1529008" vmpeak="1529008" vmrss="505601" vmhwm="640972" />
- <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1560561" vmpeak="1620039" vmrss="1084423" vmhwm="1227179" />
- <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1467497" vmpeak="1765602" vmrss="637795" vmhwm="935719" />
- <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1611261" vmpeak="2008177" vmrss="1219769" vmhwm="1615723" />
- <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1771364" vmpeak="1771364" vmrss="805464" vmhwm="935511" />
- <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1605936" vmpeak="1895415" vmrss="1127750" vmhwm="1502191" />
- <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1436468" vmpeak="1623923" vmrss="753001" vmhwm="940030" />
- <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2477649" vmpeak="2606604" vmrss="1727107" vmhwm="1917645" />
- <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1704596" vmpeak="1704596" vmrss="763807" vmhwm="939510" />
- <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2069168" vmpeak="2154365" vmrss="1592208" vmhwm="1718236" />
- <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="755742" vmpeak="920202" vmrss="149593" vmhwm="149593" />
- <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="941834" vmpeak="941834" vmrss="605690" vmhwm="605690" />
- <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1012616" vmpeak="1012616" vmrss="154793" vmhwm="154793" />
- <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="827018" vmpeak="912215" vmrss="350012" vmhwm="350012" />
- <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="833872" vmpeak="886454" vmrss="162780" vmhwm="214853" />
- <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1017369" vmpeak="1055308" vmrss="681980" vmhwm="719721" />
- <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="920363" vmpeak="1005560" vmrss="167133" vmhwm="214895" />
- <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="898206" vmpeak="983403" vmrss="419707" vmhwm="455660" />
- <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="980382" vmpeak="1099368" vmrss="295952" vmhwm="414325" />
- <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1322427" vmpeak="1407354" vmrss="987646" vmhwm="1072141" />
- <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1248421" vmpeak="1248421" vmrss="307860" vmhwm="415298" />
- <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1158170" vmpeak="1243366" vmrss="680934" vmhwm="763703" />
- <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1188829" vmpeak="1392934" vmrss="513037" vmhwm="716632" />
- <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1859291" vmpeak="1997377" vmrss="1524088" vmhwm="1661504" />
- <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1456962" vmpeak="1456962" vmrss="521965" vmhwm="715650" />
- <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1605110" vmpeak="1690306" vmrss="1127874" vmhwm="1262539" />
- <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="694122" vmpeak="774706" vmrss="35958" vmhwm="35958" />
- <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="617312" vmpeak="617312" vmrss="281574" vmhwm="281574" />
- <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="961006" vmpeak="1046203" vmrss="35443" vmhwm="35443" />
- <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="610729" vmpeak="695926" vmrss="132324" vmhwm="132324" />
- <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720948" vmpeak="795828" vmrss="98992" vmhwm="98992" />
- <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="770952" vmpeak="770952" vmrss="435333" vmhwm="435333" />
- <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="987984" vmpeak="1073181" vmrss="103136" vmhwm="103136" />
- <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727896" vmpeak="813092" vmrss="252522" vmhwm="252522" />
- <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727100" vmpeak="727100" vmrss="92372" vmhwm="92372" />
- <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="858800" vmpeak="858800" vmrss="523712" vmhwm="523712" />
- <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="994151" vmpeak="1079348" vmrss="100588" vmhwm="100588" />
- <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="763750" vmpeak="848946" vmrss="288984" vmhwm="288984" />
- <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="774893" vmrss="34673" vmhwm="34673" />
- <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631940" vmpeak="631940" vmrss="288189" vmhwm="288189" />
- <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="960580" vmpeak="1045777" vmrss="35604" vmhwm="35604" />
- <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618436" vmpeak="703632" vmrss="140368" vmhwm="140368" />
- <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="866314" vmrss="43825" vmhwm="43825" />
- <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="728395" vmpeak="756038" vmrss="383780" vmhwm="410545" />
- <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="979997" vmpeak="979997" vmrss="128320" vmhwm="128320" />
- <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="763287" vmpeak="848484" vmrss="284648" vmhwm="284648" />
- <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30253" vmhwm="30253" />
- <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="520577" vmpeak="523374" vmrss="126614" vmhwm="129084" />
- <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="963367" vmpeak="1048564" vmrss="33337" vmhwm="33337" />
- <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605597" vmpeak="690794" vmrss="128091" vmhwm="129911" />
- <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="1141790" vmpeak="1336405" vmrss="431813" vmhwm="626236" />
- <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="1443811" vmpeak="1566063" vmrss="1055756" vmhwm="1177592" />
- <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1409517" vmpeak="1409517" vmrss="472004" vmhwm="625461" />
- <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1361157" vmpeak="1446354" vmrss="883168" vmhwm="1005030" />
- <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="1125716" vmpeak="1312344" vmrss="413764" vmhwm="600215" />
- <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="1426141" vmpeak="1538960" vmrss="1037488" vmhwm="1149792" />
- <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1212156" vmpeak="1312438" vmrss="455239" vmhwm="601276" />
- <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1337679" vmpeak="1365301" vmrss="859944" vmhwm="972233" />
- <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="1299688" vmpeak="1563577" vmrss="586242" vmhwm="849924" />
- <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1812174" vmpeak="1997912" vmrss="1424103" vmhwm="1609166" />
- <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1386018" vmpeak="1563577" vmrss="626147" vmhwm="849420" />
- <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1652414" vmpeak="1755286" vmrss="1174087" vmhwm="1361599" />
- <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1287572" vmpeak="1580612" vmrss="624582" vmhwm="917441" />
- <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1513813" vmpeak="1998531" vmrss="1151737" vmhwm="1636216" />
- <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1464517" vmpeak="1580597" vmrss="626922" vmhwm="916905" />
- <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1498551" vmpeak="1889992" vmrss="1020489" vmhwm="1496653" />
- <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="746007" vmpeak="746007" vmrss="136240" vmhwm="136240" />
- <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="926957" vmpeak="926957" vmrss="577309" vmhwm="577309" />
- <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1013547" vmpeak="1013547" vmrss="142885" vmhwm="142885" />
- <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="813794" vmpeak="898991" vmrss="336570" vmhwm="336570" />
- <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="824631" vmpeak="897722" vmrss="151590" vmhwm="210714" />
- <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="838567" vmpeak="891956" vmrss="503739" vmhwm="557273" />
- <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="910988" vmpeak="996184" vmrss="158886" vmhwm="211936" />
- <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="818776" vmpeak="903973" vmrss="341322" vmhwm="391955" />
- <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1184934" vmpeak="1406100" vmrss="511170" vmhwm="731827" />
- <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1640386" vmpeak="1850810" vmrss="1305855" vmhwm="1515966" />
- <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1452578" vmpeak="1452578" vmrss="518258" vmhwm="732508" />
- <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1479166" vmpeak="1604392" vmrss="1000901" vmhwm="1210248" />
- <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1360918" vmpeak="1658852" vmrss="684892" vmhwm="982316" />
- <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2023595" vmpeak="2311010" vmrss="1620923" vmhwm="1906216" />
- <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628577" vmpeak="1713774" vmrss="691672" vmhwm="982930" />
- <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814176" vmpeak="2016393" vmrss="1336238" vmhwm="1622244" />
- <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="2119015" vmpeak="2465268" vmrss="1307748" vmhwm="1653490" />
- <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="3063808" vmpeak="3522360" vmrss="2673543" vmhwm="3130623" />
- <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2386618" vmpeak="2465538" vmrss="1321663" vmhwm="1652372" />
- <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2799269" vmpeak="3172618" vmrss="2321664" vmhwm="2777736" />
- <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="974698" vmpeak="1100762" vmrss="304220" vmhwm="429774" />
- <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1173671" vmpeak="1286625" vmrss="838682" vmhwm="951636" />
- <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1242233" vmpeak="1242233" vmrss="310086" vmhwm="429150" />
- <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1090726" vmpeak="1175922" vmrss="613813" vmhwm="726200" />
- <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="870022" vmpeak="924336" vmrss="179088" vmhwm="232892" />
- <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="1125753" vmpeak="1166344" vmrss="786666" vmhwm="827138" />
- <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="1137541" vmpeak="1137541" vmrss="184485" vmhwm="232949" />
- <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="955177" vmpeak="1040374" vmrss="477032" vmhwm="519178" />
- <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="1074985" vmpeak="1208168" vmrss="344406" vmhwm="477089" />
- <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="1383397" vmpeak="1496918" vmrss="980408" vmhwm="1092702" />
- <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1168200" vmpeak="1253397" vmrss="374275" vmhwm="477698" />
- <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1240657" vmpeak="1325854" vmrss="762725" vmhwm="854386" />
- <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713351" vmpeak="787898" vmrss="52858" vmhwm="52858" />
- <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="719794" vmpeak="719794" vmrss="384508" vmhwm="384508" />
- <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="980522" vmpeak="980522" vmrss="59456" vmhwm="59456" />
- <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="686613" vmpeak="771810" vmrss="211426" vmhwm="211426" />
- <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705796" vmpeak="705796" vmrss="52405" vmhwm="52405" />
- <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724984" vmpeak="724984" vmrss="390031" vmhwm="390031" />
- <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791918" vmpeak="877115" vmrss="56269" vmhwm="56269" />
- <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674590" vmpeak="759787" vmrss="199139" vmhwm="199139" />
- <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="850278" vmpeak="901976" vmrss="168672" vmhwm="218660" />
- <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="1092353" vmpeak="1123298" vmrss="689566" vmhwm="762699" />
- <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1118015" vmpeak="1118015" vmrss="177444" vmhwm="218670" />
- <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="944564" vmpeak="1029761" vmrss="467672" vmhwm="495326" />
- <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="740069" vmpeak="740069" vmrss="128315" vmhwm="128315" />
- <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="791986" vmpeak="791986" vmrss="456830" vmhwm="456830" />
- <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="1188891" vmpeak="1274088" vmrss="138252" vmhwm="138252" />
- <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="819218" vmpeak="904415" vmrss="342066" vmhwm="342066" />
- <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="740714" vmpeak="803946" vmrss="126521" vmhwm="126521" />
- <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="925225" vmpeak="925225" vmrss="519417" vmhwm="586206" />
- <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="1008446" vmpeak="1093643" vmrss="135714" vmhwm="135714" />
- <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="824470" vmpeak="909667" vmrss="348103" vmhwm="348103" />
- <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046843" vmpeak="1178897" vmrss="308848" vmhwm="440377" />
- <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1151961" vmpeak="1168070" vmrss="815692" vmhwm="831932" />
- <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1321751" vmpeak="1321751" vmrss="373412" vmhwm="440299" />
- <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1200820" vmpeak="1286017" vmrss="725717" vmhwm="734500" />
- <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="1186697" vmpeak="1322895" vmrss="323164" vmhwm="457116" />
- <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="1522606" vmpeak="1522606" vmrss="1120277" vmhwm="1120277" />
- <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1288424" vmpeak="1373621" vmrss="500370" vmhwm="500370" />
- <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1449448" vmpeak="1534644" vmrss="973845" vmhwm="973845" />
- <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133809" vmpeak="2836407" vmrss="1438444" vmhwm="2140850" />
- <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707359" vmpeak="3834188" vmrss="2314816" vmhwm="3441464" />
- <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401339" vmpeak="3101945" vmrss="1469098" vmhwm="2139987" />
- <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2792654" vmpeak="3834136" vmrss="2314577" vmhwm="3440408" />
- <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188804" vmpeak="2918375" vmrss="1492623" vmhwm="2222001" />
- <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2898989" vmpeak="4025117" vmrss="2481081" vmhwm="3626459" />
- <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2275379" vmpeak="2918474" vmrss="1523834" vmhwm="2221715" />
- <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2876250" vmpeak="3944834" vmrss="2398682" vmhwm="3551002" />
- <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="873480" vmpeak="943924" vmrss="196320" vmhwm="266656" />
- <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="1067367" vmpeak="1101604" vmrss="730048" vmhwm="764051" />
- <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="961745" vmpeak="1046942" vmrss="212149" vmhwm="266546" />
- <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="976471" vmpeak="1061668" vmrss="499335" vmhwm="528736" />
- <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="1428580" vmpeak="1776923" vmrss="741670" vmhwm="1089587" />
- <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="1842729" vmpeak="2177494" vmrss="1452183" vmhwm="1785934" />
- <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1514890" vmpeak="1776834" vmrss="756730" vmhwm="1088464" />
- <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1753476" vmpeak="2003045" vmrss="1275523" vmhwm="1608807" />
- <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3478618" vmpeak="4858219" vmrss="2796794" vmhwm="4176062" />
- <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4842442" vmpeak="6987687" vmrss="4397738" vmhwm="6544928" />
- <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3567340" vmpeak="4858193" vmrss="2814666" vmhwm="4176177" />
- <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4814217" vmpeak="6932785" vmrss="4335193" vmhwm="6538194" />
- <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="998956" vmpeak="1136428" vmrss="307600" vmhwm="444735" />
- <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1052719" vmpeak="1232316" vmrss="717854" vmhwm="897540" />
- <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1258004" vmpeak="1258004" vmrss="326175" vmhwm="443996" />
- <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1059619" vmpeak="1138789" vmrss="582155" vmhwm="745664" />
- <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1249211" vmpeak="1506304" vmrss="550752" vmhwm="807762" />
- <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1492743" vmpeak="1714642" vmrss="1095354" vmhwm="1316988" />
- <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1427483" vmpeak="1512680" vmrss="582514" vmhwm="806858" />
- <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1456343" vmpeak="1595287" vmrss="978369" vmhwm="1201579" />
- <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="871930" vmpeak="952359" vmrss="193388" vmhwm="273634" />
- <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="878768" vmpeak="973180" vmrss="533348" vmhwm="627848" />
- <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="959909" vmpeak="1045106" vmrss="208156" vmhwm="273530" />
- <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="883818" vmpeak="969014" vmrss="406442" vmhwm="476595" />
- <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388405" vmpeak="1700311" vmrss="680352" vmhwm="991998" />
- <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1970503" vmpeak="2164422" vmrss="1583935" vmhwm="1777209" />
- <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1661649" vmpeak="1746846" vmrss="723148" vmhwm="991354" />
- <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1812694" vmpeak="1917910" vmrss="1335609" vmhwm="1524931" />
- <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1321320" vmpeak="1630896" vmrss="658730" vmhwm="968125" />
- <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1563660" vmpeak="2064852" vmrss="1226097" vmhwm="1727050" />
- <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1679251" vmpeak="1849645" vmrss="659406" vmhwm="966815" />
- <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1557181" vmpeak="1973176" vmrss="1079998" vmhwm="1579983" />
- <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="908549" vmpeak="908549" vmrss="180804" vmhwm="180804" />
- <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1315620" vmpeak="1315620" vmrss="978213" vmhwm="978213" />
- <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170239" vmpeak="1255436" vmrss="189326" vmhwm="189326" />
- <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1068553" vmpeak="1153750" vmrss="590298" vmhwm="590298" />
- <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1160718" vmpeak="1262736" vmrss="405376" vmhwm="507317" />
- <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1898410" vmpeak="1898410" vmrss="1560884" vmhwm="1560884" />
- <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1240917" vmpeak="1326114" vmrss="419094" vmhwm="507306" />
- <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1594502" vmpeak="1679698" vmrss="1116954" vmhwm="1116954" />
- <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="991671" vmpeak="1004291" vmrss="275397" vmhwm="287918" />
- <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1616690" vmpeak="1618188" vmrss="1278908" vmhwm="1280494" />
- <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1258623" vmpeak="1258623" vmrss="284320" vmhwm="287606" />
- <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1303156" vmpeak="1388353" vmrss="824928" vmhwm="824928" />
- <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1134889" vmpeak="1188636" vmrss="367130" vmhwm="384935" />
- <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1865047" vmpeak="1865047" vmrss="1527947" vmhwm="1527947" />
- <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1220882" vmpeak="1306078" vmrss="376006" vmhwm="384217" />
- <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1551019" vmpeak="1636216" vmrss="1071928" vmhwm="1071928" />
- <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1255898" vmpeak="1437160" vmrss="461385" vmhwm="642049" />
- <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1800479" vmpeak="1945580" vmrss="1462780" vmhwm="1607470" />
- <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1530053" vmpeak="1530053" vmrss="505570" vmhwm="641368" />
- <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1561955" vmpeak="1619753" vmrss="1084324" vmhwm="1225473" />
- <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2833797" vmpeak="3516609" vmrss="1409798" vmhwm="2092417" />
- <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="4293634" vmpeak="4293634" vmrss="3955525" vmhwm="3955525" />
- <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="3022032" vmpeak="3516609" vmrss="2255333" vmhwm="2255333" />
- <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="4277993" vmpeak="4363190" vmrss="3799333" vmhwm="3799333" />
- <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="1066384" vmpeak="1233736" vmrss="390972" vmhwm="557528" />
- <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="1358442" vmpeak="1615062" vmrss="1020947" vmhwm="1273121" />
- <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1243392" vmpeak="1328589" vmrss="398580" vmhwm="558469" />
- <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1256070" vmpeak="1398212" vmrss="778549" vmhwm="1001192" />
- <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437560" vmpeak="1625010" vmrss="754254" vmhwm="941142" />
- <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2281713" vmpeak="2410668" vmrss="1943780" vmhwm="2072428" />
- <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524473" vmpeak="1625005" vmrss="763001" vmhwm="940264" />
- <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2070671" vmpeak="2155868" vmrss="1593108" vmhwm="1719125" />
- <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="835629" vmpeak="889226" vmrss="164216" vmhwm="217245" />
- <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="983507" vmpeak="1024665" vmrss="645985" vmhwm="686930" />
- <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="927451" vmpeak="1012648" vmrss="168360" vmhwm="216569" />
- <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="900712" vmpeak="985909" vmrss="423519" vmhwm="463533" />
- <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="980636" vmpeak="1099706" vmrss="296680" vmhwm="415194" />
- <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="1326213" vmpeak="1409371" vmrss="988488" vmhwm="1071366" />
- <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1248691" vmpeak="1248691" vmrss="306857" vmhwm="414752" />
- <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1163032" vmpeak="1248228" vmrss="685843" vmhwm="765507" />
- <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189531" vmpeak="1393636" vmrss="513661" vmhwm="717204" />
- <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1866176" vmpeak="2002847" vmrss="1528664" vmhwm="1664577" />
- <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1457669" vmpeak="1457669" vmrss="523811" vmhwm="715837" />
- <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1606243" vmpeak="1691440" vmrss="1129185" vmhwm="1262534" />
- <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="1521920" vmpeak="1894167" vmrss="814210" vmhwm="1185704" />
- <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1961772" vmpeak="2317998" vmrss="1623268" vmhwm="1979062" />
- <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1789325" vmpeak="1894157" vmrss="828328" vmhwm="1185480" />
- <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1951877" vmpeak="2240295" vmrss="1479337" vmhwm="1843041" />
- <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="1427384" vmpeak="1755920" vmrss="719097" vmhwm="1047295" />
- <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="2059070" vmpeak="2371101" vmrss="1721616" vmhwm="2033194" />
- <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1694035" vmpeak="1779232" vmrss="732596" vmhwm="1046208" />
- <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1863825" vmpeak="2084664" vmrss="1386002" vmhwm="1691248" />
- <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720959" vmpeak="795839" vmrss="98898" vmhwm="98898" />
- <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="749106" vmpeak="749106" vmrss="411049" vmhwm="411049" />
- <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="806941" vmpeak="806941" vmrss="104702" vmhwm="104702" />
- <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727818" vmpeak="813014" vmrss="252787" vmhwm="252787" />
- <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727116" vmpeak="793010" vmrss="92508" vmhwm="92508" />
- <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="817554" vmpeak="817554" vmrss="479762" vmhwm="479762" />
- <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="813108" vmpeak="898305" vmrss="99481" vmhwm="99481" />
- <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="765070" vmpeak="850267" vmrss="290040" vmhwm="290040" />
- <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="694023" vmrss="34377" vmhwm="34377" />
- <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631919" vmpeak="631919" vmrss="294070" vmhwm="294070" />
- <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="779532" vmpeak="864728" vmrss="36524" vmhwm="36524" />
- <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618586" vmpeak="703783" vmrss="140582" vmhwm="140582" />
- <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="783447" vmrss="42936" vmhwm="42936" />
- <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="724302" vmpeak="724302" vmrss="386261" vmhwm="386339" />
- <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="1070524" vmpeak="1155720" vmrss="129376" vmhwm="129376" />
- <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="762933" vmpeak="848130" vmrss="284216" vmhwm="284216" />
- <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30700" vmhwm="30700" />
- <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="588270" vmpeak="610240" vmrss="250692" vmhwm="269453" />
- <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="958042" vmpeak="958042" vmrss="30908" vmhwm="30908" />
- <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605176" vmpeak="690372" vmrss="127602" vmhwm="129365" />
- <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="732747" vmpeak="732747" vmrss="146874" vmhwm="146874" />
- <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="778096" vmpeak="778096" vmrss="439654" vmhwm="439654" />
- <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="818864" vmpeak="904061" vmrss="148220" vmhwm="148220" />
- <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="781279" vmpeak="866476" vmrss="323528" vmhwm="323528" />
- <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="739559" vmpeak="739559" vmrss="67152" vmhwm="67152" />
- <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="769938" vmpeak="769938" vmrss="431922" vmhwm="431922" />
- <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="1007323" vmpeak="1007323" vmrss="99127" vmhwm="99127" />
- <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="760047" vmpeak="845244" vmrss="281866" vmhwm="281866" />
- <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1219296" vmpeak="1440462" vmrss="513271" vmhwm="733850" />
- <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1693062" vmpeak="1898192" vmrss="1355270" vmhwm="1559838" />
- <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1305881" vmpeak="1440556" vmrss="527399" vmhwm="732924" />
- <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1500881" vmpeak="1620819" vmrss="1022845" vmhwm="1226721" />
- <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1406802" vmpeak="1704736" vmrss="687445" vmhwm="984760" />
- <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2147516" vmpeak="2429642" vmrss="1810073" vmhwm="2091382" />
- <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1674363" vmpeak="1759560" vmrss="702972" vmhwm="984744" />
- <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1849614" vmpeak="2046543" vmrss="1371458" vmhwm="1652222" />
- <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1218568" vmpeak="1439734" vmrss="513505" vmhwm="734136" />
- <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1688476" vmpeak="1897693" vmrss="1350502" vmhwm="1559168" />
- <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1305106" vmpeak="1439828" vmrss="526188" vmhwm="732721" />
- <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1498400" vmpeak="1619649" vmrss="1021170" vmhwm="1226201" />
- <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1406007" vmpeak="1703941" vmrss="687798" vmhwm="985082" />
- <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2132431" vmpeak="2419976" vmrss="1795331" vmhwm="2082298" />
- <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1673562" vmpeak="1758759" vmrss="702202" vmhwm="984557" />
- <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1852832" vmpeak="2055175" vmrss="1375025" vmhwm="1661046" />
- <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="1214486" vmpeak="1422704" vmrss="531008" vmhwm="738576" />
- <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1653386" vmpeak="1850721" vmrss="1316047" vmhwm="1513090" />
- <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1307545" vmpeak="1422720" vmrss="553290" vmhwm="739018" />
- <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1505826" vmpeak="1597455" vmrss="1028154" vmhwm="1203888" />
- <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="1639840" vmpeak="2058960" vmrss="933025" vmhwm="1351495" />
- <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="2290340" vmpeak="2674006" vmrss="1952048" vmhwm="2335455" />
- <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1914021" vmpeak="2149482" vmrss="959363" vmhwm="1351006" />
- <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="2119436" vmpeak="2416320" vmrss="1662554" vmhwm="2022462" />
- <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705806" vmpeak="780353" vmrss="52806" vmhwm="52806" />
- <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="700835" vmpeak="700835" vmrss="362949" vmhwm="362949" />
- <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791934" vmpeak="791934" vmrss="56794" vmhwm="56794" />
- <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674611" vmpeak="759808" vmrss="198120" vmhwm="198120" />
- <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046858" vmpeak="1178912" vmrss="308542" vmhwm="439483" />
- <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1226248" vmpeak="1247022" vmrss="889018" vmhwm="909454" />
- <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1140729" vmpeak="1225926" vmrss="372574" vmhwm="439826" />
- <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1199894" vmpeak="1285091" vmrss="724178" vmhwm="734505" />
- <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2134158" vmpeak="2836756" vmrss="1438309" vmhwm="2140715" />
- <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2781932" vmpeak="3912818" vmrss="2443178" vmhwm="3574105" />
- <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2220634" vmpeak="2836865" vmrss="1468797" vmhwm="2139722" />
- <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2790174" vmpeak="3834277" vmrss="2311826" vmhwm="3439888" />
- <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2189153" vmpeak="2918723" vmrss="1491048" vmhwm="2220868" />
- <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2869105" vmpeak="4001228" vmrss="2531100" vmhwm="3662869" />
- <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2366254" vmpeak="2918817" vmrss="1523605" vmhwm="2221388" />
- <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877716" vmpeak="3944751" vmrss="2400091" vmhwm="3551449" />
- <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3569482" vmpeak="4949084" vmrss="2797106" vmhwm="4176364" />
- <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4819713" vmpeak="6984764" vmrss="4481042" vmhwm="6645126" />
- <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3929790" vmpeak="4858536" vmrss="2814931" vmhwm="4176198" />
- <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4816962" vmpeak="6932770" vmrss="4337715" vmhwm="6538006" />
- <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="1172662" vmpeak="1401509" vmrss="491966" vmhwm="720564" />
- <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1345822" vmpeak="1585391" vmrss="1008384" vmhwm="1247916" />
- <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1442381" vmpeak="1442381" vmrss="510697" vmhwm="720267" />
- <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1348219" vmpeak="1513917" vmrss="870485" vmhwm="1120215" />
- <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="1106159" vmpeak="1204460" vmrss="268408" vmhwm="366470" />
- <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="1568190" vmpeak="1568190" vmrss="1230538" vmhwm="1230538" />
- <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1395617" vmpeak="1395617" vmrss="399692" vmhwm="399692" />
- <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1513621" vmpeak="1598818" vmrss="1035897" vmhwm="1035897" />
- <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="1108187" vmpeak="1206488" vmrss="271648" vmhwm="369590" />
- <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="2870816" vmpeak="2870816" vmrss="1290972" vmhwm="1290972" />
- <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1396408" vmpeak="1396408" vmrss="396172" vmhwm="396172" />
- <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2778490" vmpeak="2863686" vmrss="2307058" vmhwm="2307058" />
- <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="979706" vmpeak="1098692" vmrss="295682" vmhwm="414247" />
- <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1303499" vmpeak="1390069" vmrss="965224" vmhwm="1051580" />
- <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1247750" vmpeak="1247750" vmrss="307928" vmhwm="415266" />
- <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1160265" vmpeak="1245462" vmrss="682354" vmhwm="766100" />
- <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304610" vmhwm="430336" />
- <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1170265" vmpeak="1281675" vmrss="833180" vmhwm="944299" />
- <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1253189" vmpeak="1253189" vmrss="316373" vmhwm="429618" />
- <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1091214" vmpeak="1176411" vmrss="613095" vmhwm="724110" />
- <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304772" vmhwm="430414" />
- <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="1150806" vmpeak="1261878" vmrss="813394" vmhwm="924123" />
- <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1253194" vmpeak="1253194" vmrss="315463" vmhwm="428974" />
- <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1090070" vmpeak="1175267" vmrss="612274" vmhwm="722924" />
- <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="705577" vmpeak="780457" vmrss="53320" vmhwm="53320" />
- <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="716476" vmpeak="716476" vmrss="378487" vmhwm="378487" />
- <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="972613" vmpeak="1057810" vmrss="57033" vmhwm="57033" />
- <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="672594" vmpeak="757790" vmrss="194183" vmhwm="194183" />
- <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="1863586" vmpeak="2298270" vmrss="1166578" vmhwm="1601236" />
- <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="3438385" vmpeak="3992487" vmrss="3100890" vmhwm="3654268" />
- <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="2136893" vmpeak="2298270" vmrss="1177888" vmhwm="1601350" />
- <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2866156" vmpeak="3332056" vmrss="2390778" vmhwm="2939315" />
- <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="1795970" vmpeak="2230654" vmrss="1095978" vmhwm="1530557" />
- <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="3373229" vmpeak="3883687" vmrss="3035104" vmhwm="3545068" />
- <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="2069298" vmpeak="2230675" vmrss="1108967" vmhwm="1530178" />
- <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2783367" vmpeak="3206626" vmrss="2308222" vmhwm="2813283" />
- <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="1389767" vmpeak="1653657" vmrss="587459" vmhwm="851136" />
- <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1997091" vmpeak="1999374" vmrss="1659538" vmhwm="1661498" />
- <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1660250" vmpeak="1660250" vmrss="717350" vmhwm="850948" />
- <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1842703" vmpeak="1927900" vmrss="1363991" vmhwm="1363991" />
- <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="783562" vmpeak="783562" vmrss="74089" vmhwm="74089" />
- <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="976300" vmpeak="976300" vmrss="639132" vmhwm="639132" />
- <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="1055204" vmpeak="1140401" vmrss="135018" vmhwm="135018" />
- <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="895616" vmpeak="980813" vmrss="418631" vmhwm="418631" />
- <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903520" vmpeak="903520" vmrss="182405" vmhwm="182405" />
- <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1300780" vmpeak="1300780" vmrss="963144" vmhwm="963144" />
- <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1261171" vmpeak="1346368" vmrss="191354" vmhwm="191354" />
- <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1066088" vmpeak="1151285" vmrss="588608" vmhwm="588608" />
- <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992097" vmpeak="1004718" vmrss="276021" vmhwm="288532" />
- <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1673510" vmpeak="1686178" vmrss="1335256" vmhwm="1346415" />
- <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259304" vmpeak="1259304" vmrss="285667" vmhwm="288584" />
- <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1318803" vmpeak="1404000" vmrss="840652" vmhwm="840652" />
- <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="742190" vmpeak="801429" vmrss="120036" vmhwm="120036" />
- <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="917155" vmpeak="917155" vmrss="580470" vmhwm="580470" />
- <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="828079" vmpeak="828079" vmrss="124950" vmhwm="124950" />
- <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="798803" vmpeak="884000" vmrss="322223" vmhwm="322223" />
- <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="1036542" vmpeak="1123340" vmrss="332675" vmhwm="418984" />
- <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1419095" vmpeak="1503018" vmrss="1081142" vmhwm="1164966" />
- <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1122513" vmpeak="1207710" vmrss="333564" vmhwm="417877" />
- <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1206654" vmpeak="1291851" vmrss="729799" vmhwm="812141" />
- <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2502557" vmpeak="2710479" vmrss="803394" vmhwm="1011098" />
- <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4844647" vmpeak="4844647" vmrss="4505820" vmhwm="4505820" />
- <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="927518" vmpeak="990735" vmrss="192327" vmhwm="255424" />
- <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1410156" vmpeak="1410156" vmrss="1071818" vmhwm="1071818" />
- <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1348308" vmpeak="1587736" vmrss="555162" vmhwm="794456" />
- <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2073328" vmpeak="2139914" vmrss="1735650" vmhwm="1801794" />
- <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="1137926" vmpeak="1282252" vmrss="347172" vmhwm="491384" />
- <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="1528581" vmpeak="1558133" vmrss="1191273" vmhwm="1220918" />
- <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="1064445" vmpeak="1124276" vmrss="233131" vmhwm="292728" />
- <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="1608666" vmpeak="1608666" vmrss="1270744" vmhwm="1270744" />
- <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1209941" vmpeak="1295138" vmrss="396422" vmhwm="396422" />
- <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1593238" vmpeak="1678435" vmrss="1137583" vmhwm="1257484" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713814" vmpeak="788028" vmrss="53034" vmhwm="53034" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="701729" vmpeak="701729" vmrss="363578" vmhwm="363578" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="799869" vmpeak="885066" vmrss="59810" vmhwm="59810" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="687694" vmpeak="772891" vmrss="209248" vmhwm="209248" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="706258" vmpeak="780140" vmrss="52884" vmhwm="52884" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="705052" vmpeak="705052" vmrss="367395" vmhwm="367395" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="973367" vmpeak="1058564" vmrss="56414" vmhwm="56414" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677320" vmpeak="762517" vmrss="198619" vmhwm="198619" />
- <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437061" vmpeak="1624516" vmrss="755024" vmhwm="942141" />
- <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2478034" vmpeak="2597150" vmrss="2139680" vmhwm="2258219" />
- <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524120" vmpeak="1624521" vmrss="762559" vmhwm="940914" />
- <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2100274" vmpeak="2185471" vmrss="1622847" vmhwm="1739566" />
- <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="748534" vmpeak="809437" vmrss="143514" vmhwm="143514" />
- <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="943758" vmpeak="943758" vmrss="606392" vmhwm="606392" />
- <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1015783" vmpeak="1015783" vmrss="147118" vmhwm="147118" />
- <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="835073" vmpeak="920270" vmrss="357146" vmhwm="357146" />
- <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="834953" vmpeak="887541" vmrss="164626" vmhwm="217001" />
- <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1034649" vmpeak="1064835" vmrss="696592" vmhwm="726694" />
- <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="921081" vmpeak="1006278" vmrss="167502" vmhwm="215597" />
- <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="911310" vmpeak="996507" vmrss="433617" vmhwm="464682" />
- <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="971453" vmpeak="1081683" vmrss="305390" vmhwm="415204" />
- <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1332598" vmpeak="1413375" vmrss="995165" vmhwm="1075859" />
- <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1148685" vmpeak="1233882" vmrss="314220" vmhwm="414882" />
- <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1167634" vmpeak="1252830" vmrss="689416" vmhwm="769002" />
- <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189630" vmpeak="1393740" vmrss="511908" vmhwm="715540" />
- <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1867418" vmpeak="2007080" vmrss="1529990" vmhwm="1668929" />
- <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1367256" vmpeak="1452453" vmrss="523946" vmhwm="715577" />
- <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1611350" vmpeak="1696546" vmrss="1133615" vmhwm="1270427" />
- <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2715268" vmpeak="3061650" vmrss="776375" vmhwm="1122695" />
- <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4160156" vmpeak="4971210" vmrss="3823164" vmhwm="4634151" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="701350" vmpeak="776562" vmrss="42281" vmhwm="42281" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="717771" vmpeak="717771" vmrss="379501" vmhwm="379501" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="786552" vmpeak="786552" vmrss="42406" vmhwm="42406" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="656084" vmpeak="741280" vmrss="177543" vmhwm="177543" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="705936" vmpeak="781149" vmrss="55619" vmhwm="55619" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="724765" vmpeak="724765" vmrss="386458" vmhwm="386458" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="791554" vmpeak="791554" vmrss="55582" vmhwm="55582" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="670987" vmpeak="756184" vmrss="193029" vmhwm="193029" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="720673" vmpeak="720673" vmrss="99512" vmhwm="99512" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="771253" vmpeak="771253" vmrss="433087" vmhwm="433087" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="987828" vmpeak="1073025" vmrss="104005" vmhwm="104005" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="726986" vmpeak="812182" vmrss="248450" vmhwm="248450" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="726554" vmpeak="793447" vmrss="91452" vmhwm="91452" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="857027" vmpeak="857027" vmrss="519630" vmhwm="519630" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="812619" vmpeak="897816" vmrss="100895" vmhwm="100895" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="764800" vmpeak="849997" vmrss="287019" vmhwm="287019" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="739960" vmpeak="739960" vmrss="134924" vmhwm="134924" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="905439" vmpeak="905439" vmrss="567876" vmhwm="567876" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="825988" vmpeak="891722" vmrss="144684" vmhwm="144684" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="821251" vmpeak="906448" vmrss="343085" vmhwm="343085" />
- <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="1026407" vmpeak="1026407" vmrss="351535" vmhwm="351535" />
- <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="1104485" vmpeak="1149496" vmrss="766740" vmhwm="811642" />
- <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1209280" vmpeak="1209280" vmrss="362325" vmhwm="362325" />
- <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1105275" vmpeak="1190472" vmrss="627822" vmhwm="671450" />
- <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="988072" vmpeak="1114146" vmrss="304798" vmhwm="430279" />
- <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="1171383" vmpeak="1282325" vmrss="833705" vmhwm="944476" />
- <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1164982" vmpeak="1250178" vmrss="319394" vmhwm="429904" />
- <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1090481" vmpeak="1115056" vmrss="613485" vmhwm="722176" />
- <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1185163" vmpeak="1406329" vmrss="511669" vmhwm="732674" />
- <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1646897" vmpeak="1857653" vmrss="1308538" vmhwm="1518940" />
- <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1361906" vmpeak="1447102" vmrss="515138" vmhwm="731073" />
- <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1486612" vmpeak="1612171" vmrss="1008602" vmhwm="1218973" />
- <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1361328" vmpeak="1659262" vmrss="685287" vmhwm="983091" />
- <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2053204" vmpeak="2340951" vmrss="1714788" vmhwm="2002072" />
- <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628504" vmpeak="1713701" vmrss="690892" vmhwm="983257" />
- <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1817290" vmpeak="2019841" vmrss="1338792" vmhwm="1625405" />
- <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="980148" vmpeak="1106211" vmrss="304340" vmhwm="430242" />
- <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1177410" vmpeak="1291040" vmrss="839217" vmhwm="952868" />
- <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1060997" vmpeak="1146194" vmrss="308906" vmhwm="429811" />
- <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1094189" vmpeak="1123038" vmrss="616548" vmhwm="730298" />
- <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1217086" vmpeak="1438262" vmrss="515611" vmhwm="736502" />
- <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1721532" vmpeak="1922648" vmrss="1383304" vmhwm="1584195" />
- <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1394296" vmpeak="1479493" vmrss="530197" vmhwm="735883" />
- <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1533625" vmpeak="1649492" vmrss="1055813" vmhwm="1256236" />
- <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1664005" vmpeak="1929070" vmrss="791611" vmhwm="988280" />
- <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2054062" vmpeak="2324472" vmrss="1715776" vmhwm="1985344" />
- <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1750642" vmpeak="1750642" vmrss="806811" vmhwm="988041" />
- <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1905020" vmpeak="2088814" vmrss="1426682" vmhwm="1694347" />
- <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="994541" vmpeak="1120615" vmrss="307034" vmhwm="432806" />
- <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="1212042" vmpeak="1312194" vmrss="874780" vmhwm="974438" />
- <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1081334" vmpeak="1166531" vmrss="322436" vmhwm="432702" />
- <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1116720" vmpeak="1132315" vmrss="638097" vmhwm="738348" />
- <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1467762" vmpeak="1671108" vmrss="691412" vmhwm="894509" />
- <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2625381" vmpeak="2732168" vmrss="2288915" vmhwm="2392494" />
- <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="713590" vmpeak="788138" vmrss="53216" vmhwm="53216" />
- <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724427" vmpeak="724427" vmrss="386354" vmhwm="386354" />
- <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="799604" vmpeak="799604" vmrss="59534" vmhwm="59534" />
- <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="685677" vmpeak="770874" vmrss="206845" vmhwm="206845" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="832010" vmpeak="832010" vmrss="144367" vmhwm="144367" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="920249" vmpeak="920249" vmrss="582769" vmhwm="582769" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="1009200" vmpeak="1094397" vmrss="156052" vmhwm="156052" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="851666" vmpeak="936863" vmrss="374660" vmhwm="374660" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="1357855" vmpeak="1537842" vmrss="428038" vmhwm="602841" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1748255" vmpeak="1748255" vmrss="1410474" vmhwm="1410474" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1539933" vmpeak="1625130" vmrss="506157" vmhwm="602326" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597762" vmpeak="1597762" vmrss="1125956" vmhwm="1125956" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="1508566" vmpeak="1688554" vmrss="427086" vmhwm="602414" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1694071" vmpeak="1694071" vmrss="1356300" vmhwm="1356300" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1418346" vmpeak="1507495" vmrss="498206" vmhwm="602238" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1533370" vmpeak="1618567" vmrss="1062006" vmhwm="1062006" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="912147" vmpeak="990698" vmrss="224068" vmhwm="302484" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1144707" vmpeak="1222395" vmrss="807570" vmhwm="885076" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="998842" vmpeak="1048663" vmrss="239059" vmhwm="302291" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1054336" vmpeak="1139533" vmrss="577106" vmhwm="651913" />
- <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="1046905" vmpeak="1206301" vmrss="351400" vmhwm="510603" />
- <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="1199005" vmpeak="1333363" vmrss="861400" vmhwm="995815" />
- <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1132003" vmpeak="1217200" vmrss="380998" vmhwm="509615" />
- <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1174336" vmpeak="1259533" vmrss="696300" vmhwm="857849" />
- <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133768" vmpeak="2836366" vmrss="1437966" vmhwm="2140403" />
- <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2803710" vmpeak="3934762" vmrss="2464961" vmhwm="3596054" />
- <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2400741" vmpeak="2836230" vmrss="1468438" vmhwm="2139410" />
- <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793221" vmpeak="3855737" vmrss="2313766" vmhwm="3461135" />
- <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188924" vmpeak="2918494" vmrss="1491630" vmhwm="2221008" />
- <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2899624" vmpeak="4031731" vmrss="2561410" vmhwm="3693086" />
- <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2274792" vmpeak="2918401" vmrss="1523438" vmhwm="2221039" />
- <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877160" vmpeak="3966222" vmrss="2398546" vmhwm="3572186" />
- <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1252357" vmpeak="1511010" vmrss="552931" vmhwm="811361" />
- <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1481464" vmpeak="1701512" vmrss="1144072" vmhwm="1363939" />
- <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1340471" vmpeak="1510438" vmrss="585192" vmhwm="810186" />
- <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1465339" vmpeak="1601189" vmrss="987604" vmhwm="1207902" />
- <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="872019" vmpeak="952447" vmrss="192904" vmhwm="272953" />
- <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="876340" vmpeak="970054" vmrss="538460" vmhwm="632299" />
- <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="959992" vmpeak="1045189" vmrss="207662" vmhwm="273093" />
- <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="883292" vmpeak="968489" vmrss="405891" vmhwm="476907" />
- <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="1248988" vmpeak="1505738" vmrss="549031" vmhwm="805745" />
- <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="1459816" vmpeak="1681716" vmrss="1121952" vmhwm="1343638" />
- <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1337055" vmpeak="1506221" vmrss="582212" vmhwm="806447" />
- <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1456322" vmpeak="1589104" vmrss="977688" vmhwm="1194798" />
- <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388498" vmpeak="1700405" vmrss="680981" vmhwm="992706" />
- <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1904952" vmpeak="2102276" vmrss="1567898" vmhwm="1764921" />
- <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1486066" vmpeak="1705636" vmrss="724443" vmhwm="992409" />
- <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1809121" vmpeak="1916995" vmrss="1331512" vmhwm="1523137" />
- <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="803400" vmpeak="848244" vmrss="123765" vmhwm="168360" />
- <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="795683" vmpeak="825796" vmrss="458718" vmhwm="488498" />
- <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="892273" vmpeak="977470" vmrss="139048" vmhwm="168292" />
- <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="789438" vmpeak="874634" vmrss="312400" vmhwm="338832" />
+
</models>
</attributes>
\ No newline at end of file
<value>GPU</value>
</devices>
<models>
- <value>caffe/FP32/alexnet/alexnet.xml</value>
- <value>caffe/FP32/caffenet/caffenet.xml</value>
- <value>caffe/FP32/densenet_121/densenet_121.xml</value>
- <value>caffe/FP32/densenet_161/densenet_161.xml</value>
- <value>caffe/FP32/densenet_169/densenet_169.xml</value>
- <value>caffe/FP32/densenet_201/densenet_201.xml</value>
- <value>caffe/FP32/dpn_92/dpn_92.xml</value>
- <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
- <value>caffe/FP32/inception_v1/inception_v1.xml</value>
- <value>caffe/FP32/inception_v2/inception_v2.xml</value>
- <value>caffe/FP32/inception_v3/inception_v3.xml</value>
- <value>caffe/FP32/inception_v4/inception_v4.xml</value>
- <value>caffe/FP32/lenet/lenet.xml</value>
- <value>caffe/FP32/mobilenet/mobilenet.xml</value>
- <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
- <value>caffe/FP32/resnet_18/resnet_18.xml</value>
- <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
- <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
- <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
- <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
- <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
- <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
- <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
- <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
- <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
- <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
- <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
- <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
- <value>caffe/FP32/vgg16/vgg16.xml</value>
- <value>caffe/FP32/vgg19/vgg19.xml</value>
- <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
- <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
- <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
- <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
- <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
- <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
- <value>caffe/FP32/dilation/dilation.xml</value>
- <value>caffe/FP32/dssd/dssd.xml</value>
- <value>caffe/FP32/fcn8/fcn8.xml</value>
- <value>caffe/FP32/fcn32/fcn32.xml</value>
- <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
- <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
- <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
- <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
- <value>caffe/FP32/openpose_face/openpose_face.xml</value>
- <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
- <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
- <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
- <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
- <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
- <value>caffe/FP32/vnect/vnect.xml</value>
- <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
- <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
- <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
- <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
- <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
- <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
- <value>tf/1.14.0/FP32/east/east.xml</value>
- <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
- <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
- <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
- <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
- <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
- <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
- <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
- <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
- <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
- <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
- <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
- <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
- <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
- <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
- <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
- <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
- <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
- <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
- <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
- <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
- <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
- <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
- <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
- <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
- <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
- <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
- <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
- <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
- <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
- <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
- <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
- <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
- <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
- <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
- <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
- <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
- <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
- <value>mxnet/FP32/caffenet/caffenet.xml</value>
- <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
- <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
- <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
- <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
- <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
- <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
- <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
- <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
- <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
- <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
- <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
- <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
- <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
- <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
- <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
- <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
- <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
- <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
- <value>mxnet/FP32/vgg16/vgg16.xml</value>
- <value>mxnet/FP32/vgg19/vgg19.xml</value>
- <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
- <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
- <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
- <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
- <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
- <value>mxnet/FP32/location_net/location_net.xml</value>
- <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
- <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
- <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
- <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
- <value>mxnet/FP32/nin/nin.xml</value>
- <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
- <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
- <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
- <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
- <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
- <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
- <value>onnx/FP32/retina_net/retina_net.xml</value>
- <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
- <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
- <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
- <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+ <value>public/mobilenet-v2-1.4-224/FP32/mobilenet-v2-1.4-224.xml</value>
+ <value>public/resnet-101/FP32/resnet-101.xml</value>
+ <value>public/brain-tumor-segmentation-0001/FP32/brain-tumor-segmentation-0001.xml</value>
+ <value>public/octave-resnet-101-0.125/FP32/octave-resnet-101-0.125.xml</value>
+ <value>public/faster_rcnn_inception_resnet_v2_atrous_coco/FP32/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+ <value>public/efficientnet-b7_auto_aug/FP32/efficientnet-b7_auto_aug.xml</value>
+ <value>public/yolo-v2-tf/FP32/yolo-v2-tf.xml</value>
+ <value>public/mobilenet-v2-1.0-224/FP32/mobilenet-v2-1.0-224.xml</value>
+ <value>public/colorization-v2-norebal/FP32/colorization-v2-norebal.xml</value>
+ <value>public/se-inception/FP32/se-inception.xml</value>
+ <value>public/efficientnet-b0/FP32/efficientnet-b0.xml</value>
+ <value>public/mobilenet-v1-1.0-224-tf/FP32/mobilenet-v1-1.0-224-tf.xml</value>
+ <value>public/mask_rcnn_resnet101_atrous_coco/FP32/mask_rcnn_resnet101_atrous_coco.xml</value>
+ <value>public/ssd_mobilenet_v1_coco/FP32/ssd_mobilenet_v1_coco.xml</value>
+ <value>public/se-resnet-152/FP32/se-resnet-152.xml</value>
+ <value>public/octave-resnext-50-0.25/FP32/octave-resnext-50-0.25.xml</value>
+ <value>public/googlenet-v3/FP32/googlenet-v3.xml</value>
+ <value>public/ssd_mobilenet_v2_coco/FP32/ssd_mobilenet_v2_coco.xml</value>
+ <value>public/alexnet/FP32/alexnet.xml</value>
+ <value>public/license-plate-recognition-barrier-0007/FP32/license-plate-recognition-barrier-0007.xml</value>
+ <value>public/mobilenet-v1-0.50-224/FP32/mobilenet-v1-0.50-224.xml</value>
+ <value>public/ssd_mobilenet_v1_fpn_coco/FP32/ssd_mobilenet_v1_fpn_coco.xml</value>
+ <value>public/vgg16/FP32/vgg16.xml</value>
+ <value>public/face-recognition-resnet34-arcface/FP32/face-recognition-resnet34-arcface.xml</value>
+ <value>public/gmcnn-places2-tf/FP32/gmcnn-places2-tf.xml</value>
+ <value>public/mobilenet-v1-1.0-224/FP32/mobilenet-v1-1.0-224.xml</value>
+ <value>public/se-resnet-101/FP32/se-resnet-101.xml</value>
+ <value>public/face-detection-retail-0044/FP32/face-detection-retail-0044.xml</value>
+ <value>public/face-recognition-mobilefacenet-arcface/FP32/face-recognition-mobilefacenet-arcface.xml</value>
+ <value>public/vehicle-license-plate-detection-barrier-0123/FP32/vehicle-license-plate-detection-barrier-0123.xml</value>
+ <value>public/densenet-161/FP32/densenet-161.xml</value>
+ <value>public/mask_rcnn_inception_resnet_v2_atrous_coco/FP32/mask_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+ <value>public/octave-resnext-101-0.25/FP32/octave-resnext-101-0.25.xml</value>
+ <value>public/face-recognition-resnet50-arcface/FP32/face-recognition-resnet50-arcface.xml</value>
+ <value>public/densenet-161-tf/FP32/densenet-161-tf.xml</value>
+ <value>public/octave-resnet-200-0.125/FP32/octave-resnet-200-0.125.xml</value>
+ <value>public/mtcnn-p/FP32/mtcnn-p.xml</value>
+ <value>public/se-resnext-101/FP32/se-resnext-101.xml</value>
+ <value>public/efficientnet-b5/FP32/efficientnet-b5.xml</value>
+ <value>public/densenet-169-tf/FP32/densenet-169-tf.xml</value>
+ <value>public/densenet-201/FP32/densenet-201.xml</value>
+ <value>public/resnet-50-tf/FP32/resnet-50-tf.xml</value>
+ <value>public/squeezenet1.1/FP32/squeezenet1.1.xml</value>
+ <value>public/squeezenet1.0/FP32/squeezenet1.0.xml</value>
+ <value>public/octave-resnet-26-0.25/FP32/octave-resnet-26-0.25.xml</value>
+ <value>public/googlenet-v4-tf/FP32/googlenet-v4-tf.xml</value>
+ <value>public/ssd300/FP32/ssd300.xml</value>
+ <value>public/rfcn-resnet101-coco-tf/FP32/rfcn-resnet101-coco-tf.xml</value>
+ <value>public/vgg19/FP32/vgg19.xml</value>
+ <value>public/ctdet_coco_dlav0_384/FP32/ctdet_coco_dlav0_384.xml</value>
+ <value>public/efficientnet-b0_auto_aug/FP32/efficientnet-b0_auto_aug.xml</value>
+ <value>public/googlenet-v1/FP32/googlenet-v1.xml</value>
+ <value>public/faster_rcnn_inception_v2_coco/FP32/faster_rcnn_inception_v2_coco.xml</value>
+ <value>public/mask_rcnn_inception_v2_coco/FP32/mask_rcnn_inception_v2_coco.xml</value>
+ <value>public/inception-resnet-v2-tf/FP32/inception-resnet-v2-tf.xml</value>
+ <value>public/deeplabv3/FP32/deeplabv3.xml</value>
+ <value>public/yolo-v3-tf/FP32/yolo-v3-tf.xml</value>
+ <value>public/resnet-152/FP32/resnet-152.xml</value>
+ <value>public/mtcnn-o/FP32/mtcnn-o.xml</value>
+ <value>public/octave-se-resnet-50-0.125/FP32/octave-se-resnet-50-0.125.xml</value>
+ <value>public/yolo-v1-tiny-tf/FP32/yolo-v1-tiny-tf.xml</value>
+ <value>public/resnet-50/FP32/resnet-50.xml</value>
+ <value>public/googlenet-v1-tf/FP32/googlenet-v1-tf.xml</value>
+ <value>public/yolo-v2-tiny-tf/FP32/yolo-v2-tiny-tf.xml</value>
+ <value>public/ssd512/FP32/ssd512.xml</value>
+ <value>public/densenet-169/FP32/densenet-169.xml</value>
+ <value>public/brain-tumor-segmentation-0002/FP32/brain-tumor-segmentation-0002.xml</value>
+ <value>public/Sphereface/FP32/Sphereface.xml</value>
+ <value>public/googlenet-v2/FP32/googlenet-v2.xml</value>
+ <value>public/face-recognition-resnet100-arcface/FP32/face-recognition-resnet100-arcface.xml</value>
+ <value>public/mobilenet-v1-0.25-128/FP32/mobilenet-v1-0.25-128.xml</value>
+ <value>public/ctdet_coco_dlav0_512/FP32/ctdet_coco_dlav0_512.xml</value>
+ <value>public/facenet-20180408-102900/FP32/facenet-20180408-102900.xml</value>
+ <value>public/ctpn/FP32/ctpn.xml</value>
+ <value>public/ssdlite_mobilenet_v2/FP32/ssdlite_mobilenet_v2.xml</value>
+ <value>public/i3d-rgb-tf/FP32/i3d-rgb-tf.xml</value>
+ <value>public/mobilenet-v2/FP32/mobilenet-v2.xml</value>
+ <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+ <value>public/se-resnext-50/FP32/se-resnext-50.xml</value>
+ <value>public/caffenet/FP32/caffenet.xml</value>
+ <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
+ <value>public/faster_rcnn_resnet50_coco/FP32/faster_rcnn_resnet50_coco.xml</value>
+ <value>public/se-resnet-50/FP32/se-resnet-50.xml</value>
+ <value>public/mask_rcnn_resnet50_atrous_coco/FP32/mask_rcnn_resnet50_atrous_coco.xml</value>
+ <value>public/octave-resnet-50-0.125/FP32/octave-resnet-50-0.125.xml</value>
+ <value>public/densenet-121-tf/FP32/densenet-121-tf.xml</value>
+ <value>public/mobilenet-v1-0.50-160/FP32/mobilenet-v1-0.50-160.xml</value>
+ <value>public/densenet-121/FP32/densenet-121.xml</value>
+ <value>public/faster_rcnn_resnet101_coco/FP32/faster_rcnn_resnet101_coco.xml</value>
+ <value>public/octave-densenet-121-0.125/FP32/octave-densenet-121-0.125.xml</value>
+ <value>public/colorization-v2/FP32/colorization-v2.xml</value>
+ <value>public/densenet-121-caffe2/FP32/densenet-121-caffe2.xml</value>
+ <value>public/efficientnet-b0-pytorch/FP32/efficientnet-b0-pytorch.xml</value>
+ <value>public/efficientnet-b5-pytorch/FP32/efficientnet-b5-pytorch.xml</value>
+ <value>public/efficientnet-b7-pytorch/FP32/efficientnet-b7-pytorch.xml</value>
+ <value>public/googlenet-v3-pytorch/FP32/googlenet-v3-pytorch.xml</value>
+ <value>public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml</value>
+ <value>public/midasnet/FP32/midasnet.xml</value>
+ <value>public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml</value>
+ <value>public/resnet-18-pytorch/FP32/resnet-18-pytorch.xml</value>
+ <value>public/resnet-50-caffe2/FP32/resnet-50-caffe2.xml</value>
+ <value>public/resnet-50-pytorch/FP32/resnet-50-pytorch.xml</value>
+ <value>public/single-human-pose-estimation-0001/FP32/single-human-pose-estimation-0001.xml</value>
+ <value>public/squeezenet1.1-caffe2/FP32/squeezenet1.1-caffe2.xml</value>
+ <value>public/vgg19-caffe2/FP32/vgg19-caffe2.xml</value>
+ <value>intel/facial-landmarks-35-adas-0002/FP32/facial-landmarks-35-adas-0002.xml</value>
+ <value>intel/vehicle-attributes-recognition-barrier-0039/FP32/vehicle-attributes-recognition-barrier-0039.xml</value>
+ <value>intel/person-detection-action-recognition-0006/FP32/person-detection-action-recognition-0006.xml</value>
+ <value>intel/asl-recognition-0004/FP32/asl-recognition-0004.xml</value>
+ <value>intel/yolo-v2-tiny-ava-sparse-30-0001/FP32/yolo-v2-tiny-ava-sparse-30-0001.xml</value>
+ <value>intel/text-detection-0004/FP32/text-detection-0004.xml</value>
+ <value>intel/person-vehicle-bike-detection-crossroad-1016/FP32/person-vehicle-bike-detection-crossroad-1016.xml</value>
+ <value>intel/text-spotting-0002-detector/FP32/text-spotting-0002-detector.xml</value>
+ <value>intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013.xml</value>
+ <value>intel/vehicle-detection-adas-0002/FP32/vehicle-detection-adas-0002.xml</value>
+ <value>intel/image-retrieval-0001/FP32/image-retrieval-0001.xml</value>
+ <value>intel/person-detection-retail-0002/FP32/person-detection-retail-0002.xml</value>
+ <value>intel/person-attributes-recognition-crossroad-0230/FP32/person-attributes-recognition-crossroad-0230.xml</value>
+ <value>intel/face-detection-0100/FP32/face-detection-0100.xml</value>
+ <value>intel/face-detection-0102/FP32/face-detection-0102.xml</value>
+ <value>intel/person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.xml</value>
+ <value>intel/person-reidentification-retail-0300/FP32/person-reidentification-retail-0300.xml</value>
+ <value>intel/instance-segmentation-security-0010/FP32/instance-segmentation-security-0010.xml</value>
+ <value>intel/instance-segmentation-security-0083/FP32/instance-segmentation-security-0083.xml</value>
+ <value>intel/face-detection-0105/FP32/face-detection-0105.xml</value>
+ <value>intel/face-detection-0104/FP32/face-detection-0104.xml</value>
+ <value>intel/icnet-camvid-ava-sparse-30-0001/FP32/icnet-camvid-ava-sparse-30-0001.xml</value>
+ <value>intel/action-recognition-0001-decoder/FP32/action-recognition-0001-decoder.xml</value>
+ <value>intel/face-detection-0106/FP32/face-detection-0106.xml</value>
+ <value>intel/person-detection-action-recognition-teacher-0002/FP32/person-detection-action-recognition-teacher-0002.xml</value>
+ <value>intel/person-vehicle-bike-detection-crossroad-0078/FP32/person-vehicle-bike-detection-crossroad-0078.xml</value>
+ <value>intel/icnet-camvid-ava-sparse-60-0001/FP32/icnet-camvid-ava-sparse-60-0001.xml</value>
+ <value>intel/face-detection-adas-0001/FP32/face-detection-adas-0001.xml</value>
+ <value>intel/unet-camvid-onnx-0001/FP32/unet-camvid-onnx-0001.xml</value>
+ <value>intel/human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml</value>
+ <value>intel/faster-rcnn-resnet101-coco-sparse-60-0001/FP32/faster-rcnn-resnet101-coco-sparse-60-0001.xml</value>
+ <value>intel/action-recognition-0001-encoder/FP32/action-recognition-0001-encoder.xml</value>
+ <value>intel/yolo-v2-ava-sparse-35-0001/FP32/yolo-v2-ava-sparse-35-0001.xml</value>
+ <value>intel/yolo-v2-ava-sparse-70-0001/FP32/yolo-v2-ava-sparse-70-0001.xml</value>
+ <value>intel/person-reidentification-retail-0248/FP32/person-reidentification-retail-0248.xml</value>
+ <value>intel/person-detection-raisinghand-recognition-0001/FP32/person-detection-raisinghand-recognition-0001.xml</value>
+ <value>intel/person-detection-asl-0001/FP32/person-detection-asl-0001.xml</value>
+ <value>intel/emotions-recognition-retail-0003/FP32/emotions-recognition-retail-0003.xml</value>
+ <value>intel/yolo-v2-tiny-ava-0001/FP32/yolo-v2-tiny-ava-0001.xml</value>
+ <value>intel/license-plate-recognition-barrier-0001/FP32/license-plate-recognition-barrier-0001.xml</value>
+ <value>intel/person-detection-retail-0013/FP32/person-detection-retail-0013.xml</value>
+ <value>intel/instance-segmentation-security-0050/FP32/instance-segmentation-security-0050.xml</value>
+ <value>intel/single-image-super-resolution-1032/FP32/single-image-super-resolution-1032.xml</value>
+ <value>intel/landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml</value>
+ <value>intel/driver-action-recognition-adas-0002-decoder/FP32/driver-action-recognition-adas-0002-decoder.xml</value>
+ <value>intel/person-reidentification-retail-0249/FP32/person-reidentification-retail-0249.xml</value>
+ <value>intel/text-spotting-0002-recognizer-decoder/FP32/text-spotting-0002-recognizer-decoder.xml</value>
+ <value>intel/yolo-v2-ava-0001/FP32/yolo-v2-ava-0001.xml</value>
+ <value>intel/person-detection-action-recognition-0005/FP32/person-detection-action-recognition-0005.xml</value>
+ <value>intel/text-recognition-0012/FP32/text-recognition-0012.xml</value>
+ <value>intel/face-detection-retail-0004/FP32/face-detection-retail-0004.xml</value>
+ <value>intel/product-detection-0001/FP32/product-detection-0001.xml</value>
+ <value>intel/yolo-v2-tiny-ava-sparse-60-0001/FP32/yolo-v2-tiny-ava-sparse-60-0001.xml</value>
+ <value>intel/face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml</value>
+ <value>intel/road-segmentation-adas-0001/FP32/road-segmentation-adas-0001.xml</value>
+ <value>intel/single-image-super-resolution-1033/FP32/single-image-super-resolution-1033.xml</value>
+ <value>intel/face-detection-retail-0005/FP32/face-detection-retail-0005.xml</value>
+ <value>intel/pedestrian-and-vehicle-detector-adas-0001/FP32/pedestrian-and-vehicle-detector-adas-0001.xml</value>
+ <value>intel/handwritten-japanese-recognition-0001/FP32/handwritten-japanese-recognition-0001.xml</value>
+ <value>intel/semantic-segmentation-adas-0001/FP32/semantic-segmentation-adas-0001.xml</value>
+ <value>intel/pedestrian-detection-adas-0002/FP32/pedestrian-detection-adas-0002.xml</value>
+ <value>intel/driver-action-recognition-adas-0002-encoder/FP32/driver-action-recognition-adas-0002-encoder.xml</value>
+ <value>intel/text-detection-0003/FP32/text-detection-0003.xml</value>
+ <value>intel/text-spotting-0002-recognizer-encoder/FP32/text-spotting-0002-recognizer-encoder.xml</value>
+ <value>intel/handwritten-score-recognition-0003/FP32/handwritten-score-recognition-0003.xml</value>
+ <value>intel/icnet-camvid-ava-0001/FP32/icnet-camvid-ava-0001.xml</value>
+ <value>intel/text-image-super-resolution-0001/FP32/text-image-super-resolution-0001.xml</value>
+ <value>intel/gaze-estimation-adas-0002/FP32/gaze-estimation-adas-0002.xml</value>
+ <value>intel/head-pose-estimation-adas-0001/FP32/head-pose-estimation-adas-0001.xml</value>
+ <value>intel/vehicle-license-plate-detection-barrier-0106/FP32/vehicle-license-plate-detection-barrier-0106.xml</value>
+ <value>intel/instance-segmentation-security-1025/FP32/instance-segmentation-security-1025.xml</value>
</models>
</attributes>
\ No newline at end of file
<?xml version="1.0"?>
<attributes>
<models>
-<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="CPU" vmsize="757218" vmpeak="901683" vmrss="73920" vmhwm="107866" />
-<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="GPU" vmsize="747815" vmpeak="860978" vmrss="401808" vmhwm="435358" />
-<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="CPU" vmsize="1001189" vmpeak="1001189" vmrss="116080" vmhwm="116080" />
-<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="GPU" vmsize="788752" vmpeak="860842" vmrss="435283" vmhwm="435283" />
-<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="CPU" vmsize="754806" vmpeak="803184" vmrss="15206" vmhwm="26532" />
-<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="GPU" vmsize="554650" vmpeak="644666" vmrss="207592" vmhwm="217720" />
-<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="CPU" vmsize="959257" vmpeak="959257" vmrss="26690" vmhwm="26690" />
-<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="GPU" vmsize="572576" vmpeak="644666" vmrss="215230" vmhwm="215230" />
-<model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="CPU" vmsize="755224" vmpeak="1146142" vmrss="22246" vmhwm="370770" />
-<model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="GPU" vmsize="747709" vmpeak="1031694" vmrss="401746" vmhwm="749962" />
-<model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="CPU" vmsize="1343474" vmpeak="1415563" vmrss="314204" vmhwm="371131" />
-<model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="GPU" vmsize="1088700" vmpeak="1160790" vmrss="739626" vmhwm="748008" />
-<model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754050" vmpeak="2548532" vmrss="15593" vmhwm="1808765" />
-<model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="648912" vmpeak="3289101" vmrss="299327" vmhwm="3003457" />
-<model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2257006" vmpeak="2548532" vmrss="1243448" vmhwm="1809143" />
-<model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2413290" vmpeak="3289101" vmrss="2059780" vmhwm="3006845" />
+ <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="CPU" vmsize="740214" vmpeak="805110" vmrss="129308" vmhwm="129308" />
+ <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="GPU" vmsize="922147" vmpeak="922147" vmrss="587522" vmhwm="587522" />
+ <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="CPU" vmsize="1007890" vmpeak="1007890" vmrss="138652" vmhwm="138652" />
+ <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="GPU" vmsize="1006439" vmpeak="1091636" vmrss="587241" vmhwm="587241" />
+ <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="CPU" vmsize="691589" vmpeak="922864" vmrss="31054" vmhwm="31054" />
+ <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="GPU" vmsize="626194" vmpeak="626194" vmrss="290695" vmhwm="290695" />
+ <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="CPU" vmsize="958240" vmpeak="1043437" vmrss="31366" vmhwm="31366" />
+ <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="GPU" vmsize="708734" vmpeak="793930" vmrss="287877" vmhwm="287877" />
+ <model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="CPU" vmsize="1046988" vmpeak="1179042" vmrss="307990" vmhwm="439457" />
+ <model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="GPU" vmsize="1267775" vmpeak="1279647" vmrss="932672" vmhwm="944626" />
+ <model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="CPU" vmsize="1321819" vmpeak="1321819" vmrss="374207" vmhwm="439748" />
+ <model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="GPU" vmsize="1356565" vmpeak="1441762" vmrss="941418" vmhwm="947060" />
+ <model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133814" vmpeak="2836412" vmrss="1438049" vmhwm="2140533" />
+ <model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2801422" vmpeak="3915366" vmrss="2465065" vmhwm="3578811" />
+ <model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401380" vmpeak="2836412" vmrss="1469832" vmhwm="2140377" />
+ <model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2892432" vmpeak="3939166" vmrss="2472017" vmhwm="3602924" />
</models>
</attributes>
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <irs_path>
+ <value>${STRESS_IRS_PATH}</value>
+ </irs_path>
+</attributes>
<?xml version="1.0"?>
<attributes>
<models>
- <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1321668" vmpeak="1631245" vmrss="657919" vmhwm="967408" />
- <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1563796" vmpeak="2064987" vmrss="1227532" vmhwm="1728485" />
- <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1589073" vmpeak="1631151" vmrss="659287" vmhwm="966721" />
- <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1557202" vmpeak="1973197" vmrss="1079972" vmhwm="1580035" />
- <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1341314" vmpeak="1650890" vmrss="665329" vmhwm="974724" />
- <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1591844" vmpeak="1793074" vmrss="1255238" vmhwm="1456566" />
- <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1441388" vmpeak="1650797" vmrss="682999" vmhwm="973897" />
- <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1605884" vmpeak="1696297" vmrss="1128160" vmhwm="1303270" />
- <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903562" vmpeak="903562" vmrss="180684" vmhwm="180684" />
- <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1301939" vmpeak="1301939" vmrss="964126" vmhwm="964126" />
- <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170582" vmpeak="1255779" vmrss="189836" vmhwm="189836" />
- <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1057290" vmpeak="1142486" vmrss="582316" vmhwm="582316" />
- <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1155512" vmpeak="1257531" vmrss="406551" vmhwm="508289" />
- <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1884636" vmpeak="1884636" vmrss="1547655" vmhwm="1547655" />
- <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1241500" vmpeak="1326696" vmrss="419666" vmhwm="506740" />
- <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1583504" vmpeak="1668700" vmrss="1108941" vmhwm="1108941" />
- <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992170" vmpeak="1004790" vmrss="275704" vmhwm="288189" />
- <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1487241" vmpeak="1487241" vmrss="1150458" vmhwm="1150458" />
- <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259122" vmpeak="1259122" vmrss="283545" vmhwm="286317" />
- <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1294259" vmpeak="1379456" vmrss="819712" vmhwm="819712" />
- <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1135388" vmpeak="1188803" vmrss="366688" vmhwm="384436" />
- <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1903132" vmpeak="1903132" vmrss="1341693" vmhwm="1509783" />
- <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1221381" vmpeak="1306578" vmrss="376038" vmhwm="384514" />
- <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1517360" vmpeak="1602556" vmrss="1041424" vmhwm="1041424" />
- <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="2658385" vmpeak="3374820" vmrss="1479264" vmhwm="2195507" />
- <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="3398751" vmpeak="3980990" vmrss="3009406" vmhwm="3589695" />
- <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2763358" vmpeak="3374727" vmrss="1996228" vmhwm="2195658" />
- <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="3381653" vmpeak="3900676" vmrss="2904111" vmhwm="3506760" />
- <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1254858" vmpeak="1436120" vmrss="461666" vmhwm="642226" />
- <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1880288" vmpeak="2024947" vmrss="1544847" vmhwm="1688965" />
- <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1529008" vmpeak="1529008" vmrss="505601" vmhwm="640972" />
- <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1560561" vmpeak="1620039" vmrss="1084423" vmhwm="1227179" />
- <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1467497" vmpeak="1765602" vmrss="637795" vmhwm="935719" />
- <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1611261" vmpeak="2008177" vmrss="1219769" vmhwm="1615723" />
- <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1771364" vmpeak="1771364" vmrss="805464" vmhwm="935511" />
- <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1605936" vmpeak="1895415" vmrss="1127750" vmhwm="1502191" />
- <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1436468" vmpeak="1623923" vmrss="753001" vmhwm="940030" />
- <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2477649" vmpeak="2606604" vmrss="1727107" vmhwm="1917645" />
- <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1704596" vmpeak="1704596" vmrss="763807" vmhwm="939510" />
- <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2069168" vmpeak="2154365" vmrss="1592208" vmhwm="1718236" />
- <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="755742" vmpeak="920202" vmrss="149593" vmhwm="149593" />
- <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="941834" vmpeak="941834" vmrss="605690" vmhwm="605690" />
- <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1012616" vmpeak="1012616" vmrss="154793" vmhwm="154793" />
- <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="827018" vmpeak="912215" vmrss="350012" vmhwm="350012" />
- <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="833872" vmpeak="886454" vmrss="162780" vmhwm="214853" />
- <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1017369" vmpeak="1055308" vmrss="681980" vmhwm="719721" />
- <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="920363" vmpeak="1005560" vmrss="167133" vmhwm="214895" />
- <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="898206" vmpeak="983403" vmrss="419707" vmhwm="455660" />
- <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="980382" vmpeak="1099368" vmrss="295952" vmhwm="414325" />
- <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1322427" vmpeak="1407354" vmrss="987646" vmhwm="1072141" />
- <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1248421" vmpeak="1248421" vmrss="307860" vmhwm="415298" />
- <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1158170" vmpeak="1243366" vmrss="680934" vmhwm="763703" />
- <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1188829" vmpeak="1392934" vmrss="513037" vmhwm="716632" />
- <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1859291" vmpeak="1997377" vmrss="1524088" vmhwm="1661504" />
- <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1456962" vmpeak="1456962" vmrss="521965" vmhwm="715650" />
- <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1605110" vmpeak="1690306" vmrss="1127874" vmhwm="1262539" />
- <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="694122" vmpeak="774706" vmrss="35958" vmhwm="35958" />
- <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="617312" vmpeak="617312" vmrss="281574" vmhwm="281574" />
- <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="961006" vmpeak="1046203" vmrss="35443" vmhwm="35443" />
- <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="610729" vmpeak="695926" vmrss="132324" vmhwm="132324" />
- <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720948" vmpeak="795828" vmrss="98992" vmhwm="98992" />
- <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="770952" vmpeak="770952" vmrss="435333" vmhwm="435333" />
- <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="987984" vmpeak="1073181" vmrss="103136" vmhwm="103136" />
- <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727896" vmpeak="813092" vmrss="252522" vmhwm="252522" />
- <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727100" vmpeak="727100" vmrss="92372" vmhwm="92372" />
- <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="858800" vmpeak="858800" vmrss="523712" vmhwm="523712" />
- <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="994151" vmpeak="1079348" vmrss="100588" vmhwm="100588" />
- <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="763750" vmpeak="848946" vmrss="288984" vmhwm="288984" />
- <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="774893" vmrss="34673" vmhwm="34673" />
- <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631940" vmpeak="631940" vmrss="288189" vmhwm="288189" />
- <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="960580" vmpeak="1045777" vmrss="35604" vmhwm="35604" />
- <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618436" vmpeak="703632" vmrss="140368" vmhwm="140368" />
- <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="866314" vmrss="43825" vmhwm="43825" />
- <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="728395" vmpeak="756038" vmrss="383780" vmhwm="410545" />
- <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="979997" vmpeak="979997" vmrss="128320" vmhwm="128320" />
- <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="763287" vmpeak="848484" vmrss="284648" vmhwm="284648" />
- <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30253" vmhwm="30253" />
- <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="520577" vmpeak="523374" vmrss="126614" vmhwm="129084" />
- <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="963367" vmpeak="1048564" vmrss="33337" vmhwm="33337" />
- <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605597" vmpeak="690794" vmrss="128091" vmhwm="129911" />
- <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="1141790" vmpeak="1336405" vmrss="431813" vmhwm="626236" />
- <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="1443811" vmpeak="1566063" vmrss="1055756" vmhwm="1177592" />
- <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1409517" vmpeak="1409517" vmrss="472004" vmhwm="625461" />
- <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1361157" vmpeak="1446354" vmrss="883168" vmhwm="1005030" />
- <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="1125716" vmpeak="1312344" vmrss="413764" vmhwm="600215" />
- <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="1426141" vmpeak="1538960" vmrss="1037488" vmhwm="1149792" />
- <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1212156" vmpeak="1312438" vmrss="455239" vmhwm="601276" />
- <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1337679" vmpeak="1365301" vmrss="859944" vmhwm="972233" />
- <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="1299688" vmpeak="1563577" vmrss="586242" vmhwm="849924" />
- <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1812174" vmpeak="1997912" vmrss="1424103" vmhwm="1609166" />
- <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1386018" vmpeak="1563577" vmrss="626147" vmhwm="849420" />
- <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1652414" vmpeak="1755286" vmrss="1174087" vmhwm="1361599" />
- <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1287572" vmpeak="1580612" vmrss="624582" vmhwm="917441" />
- <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1513813" vmpeak="1998531" vmrss="1151737" vmhwm="1636216" />
- <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1464517" vmpeak="1580597" vmrss="626922" vmhwm="916905" />
- <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1498551" vmpeak="1889992" vmrss="1020489" vmhwm="1496653" />
- <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="746007" vmpeak="746007" vmrss="136240" vmhwm="136240" />
- <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="926957" vmpeak="926957" vmrss="577309" vmhwm="577309" />
- <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1013547" vmpeak="1013547" vmrss="142885" vmhwm="142885" />
- <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="813794" vmpeak="898991" vmrss="336570" vmhwm="336570" />
- <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="824631" vmpeak="897722" vmrss="151590" vmhwm="210714" />
- <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="838567" vmpeak="891956" vmrss="503739" vmhwm="557273" />
- <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="910988" vmpeak="996184" vmrss="158886" vmhwm="211936" />
- <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="818776" vmpeak="903973" vmrss="341322" vmhwm="391955" />
- <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1184934" vmpeak="1406100" vmrss="511170" vmhwm="731827" />
- <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1640386" vmpeak="1850810" vmrss="1305855" vmhwm="1515966" />
- <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1452578" vmpeak="1452578" vmrss="518258" vmhwm="732508" />
- <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1479166" vmpeak="1604392" vmrss="1000901" vmhwm="1210248" />
- <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1360918" vmpeak="1658852" vmrss="684892" vmhwm="982316" />
- <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2023595" vmpeak="2311010" vmrss="1620923" vmhwm="1906216" />
- <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628577" vmpeak="1713774" vmrss="691672" vmhwm="982930" />
- <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814176" vmpeak="2016393" vmrss="1336238" vmhwm="1622244" />
- <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="2119015" vmpeak="2465268" vmrss="1307748" vmhwm="1653490" />
- <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="3063808" vmpeak="3522360" vmrss="2673543" vmhwm="3130623" />
- <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2386618" vmpeak="2465538" vmrss="1321663" vmhwm="1652372" />
- <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2799269" vmpeak="3172618" vmrss="2321664" vmhwm="2777736" />
- <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="974698" vmpeak="1100762" vmrss="304220" vmhwm="429774" />
- <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1173671" vmpeak="1286625" vmrss="838682" vmhwm="951636" />
- <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1242233" vmpeak="1242233" vmrss="310086" vmhwm="429150" />
- <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1090726" vmpeak="1175922" vmrss="613813" vmhwm="726200" />
- <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="870022" vmpeak="924336" vmrss="179088" vmhwm="232892" />
- <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="1125753" vmpeak="1166344" vmrss="786666" vmhwm="827138" />
- <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="1137541" vmpeak="1137541" vmrss="184485" vmhwm="232949" />
- <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="955177" vmpeak="1040374" vmrss="477032" vmhwm="519178" />
- <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="1074985" vmpeak="1208168" vmrss="344406" vmhwm="477089" />
- <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="1383397" vmpeak="1496918" vmrss="980408" vmhwm="1092702" />
- <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1168200" vmpeak="1253397" vmrss="374275" vmhwm="477698" />
- <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1240657" vmpeak="1325854" vmrss="762725" vmhwm="854386" />
- <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713351" vmpeak="787898" vmrss="52858" vmhwm="52858" />
- <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="719794" vmpeak="719794" vmrss="384508" vmhwm="384508" />
- <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="980522" vmpeak="980522" vmrss="59456" vmhwm="59456" />
- <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="686613" vmpeak="771810" vmrss="211426" vmhwm="211426" />
- <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705796" vmpeak="705796" vmrss="52405" vmhwm="52405" />
- <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724984" vmpeak="724984" vmrss="390031" vmhwm="390031" />
- <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791918" vmpeak="877115" vmrss="56269" vmhwm="56269" />
- <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674590" vmpeak="759787" vmrss="199139" vmhwm="199139" />
- <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="850278" vmpeak="901976" vmrss="168672" vmhwm="218660" />
- <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="1092353" vmpeak="1123298" vmrss="689566" vmhwm="762699" />
- <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1118015" vmpeak="1118015" vmrss="177444" vmhwm="218670" />
- <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="944564" vmpeak="1029761" vmrss="467672" vmhwm="495326" />
- <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="740069" vmpeak="740069" vmrss="128315" vmhwm="128315" />
- <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="791986" vmpeak="791986" vmrss="456830" vmhwm="456830" />
- <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="1188891" vmpeak="1274088" vmrss="138252" vmhwm="138252" />
- <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="819218" vmpeak="904415" vmrss="342066" vmhwm="342066" />
- <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="740714" vmpeak="803946" vmrss="126521" vmhwm="126521" />
- <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="925225" vmpeak="925225" vmrss="519417" vmhwm="586206" />
- <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="1008446" vmpeak="1093643" vmrss="135714" vmhwm="135714" />
- <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="824470" vmpeak="909667" vmrss="348103" vmhwm="348103" />
- <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046843" vmpeak="1178897" vmrss="308848" vmhwm="440377" />
- <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1151961" vmpeak="1168070" vmrss="815692" vmhwm="831932" />
- <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1321751" vmpeak="1321751" vmrss="373412" vmhwm="440299" />
- <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1200820" vmpeak="1286017" vmrss="725717" vmhwm="734500" />
- <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="1186697" vmpeak="1322895" vmrss="323164" vmhwm="457116" />
- <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="1522606" vmpeak="1522606" vmrss="1120277" vmhwm="1120277" />
- <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1288424" vmpeak="1373621" vmrss="500370" vmhwm="500370" />
- <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1449448" vmpeak="1534644" vmrss="973845" vmhwm="973845" />
- <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133809" vmpeak="2836407" vmrss="1438444" vmhwm="2140850" />
- <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707359" vmpeak="3834188" vmrss="2314816" vmhwm="3441464" />
- <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401339" vmpeak="3101945" vmrss="1469098" vmhwm="2139987" />
- <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2792654" vmpeak="3834136" vmrss="2314577" vmhwm="3440408" />
- <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188804" vmpeak="2918375" vmrss="1492623" vmhwm="2222001" />
- <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2898989" vmpeak="4025117" vmrss="2481081" vmhwm="3626459" />
- <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2275379" vmpeak="2918474" vmrss="1523834" vmhwm="2221715" />
- <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2876250" vmpeak="3944834" vmrss="2398682" vmhwm="3551002" />
- <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="873480" vmpeak="943924" vmrss="196320" vmhwm="266656" />
- <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="1067367" vmpeak="1101604" vmrss="730048" vmhwm="764051" />
- <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="961745" vmpeak="1046942" vmrss="212149" vmhwm="266546" />
- <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="976471" vmpeak="1061668" vmrss="499335" vmhwm="528736" />
- <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="1428580" vmpeak="1776923" vmrss="741670" vmhwm="1089587" />
- <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="1842729" vmpeak="2177494" vmrss="1452183" vmhwm="1785934" />
- <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1514890" vmpeak="1776834" vmrss="756730" vmhwm="1088464" />
- <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1753476" vmpeak="2003045" vmrss="1275523" vmhwm="1608807" />
- <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3478618" vmpeak="4858219" vmrss="2796794" vmhwm="4176062" />
- <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4842442" vmpeak="6987687" vmrss="4397738" vmhwm="6544928" />
- <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3567340" vmpeak="4858193" vmrss="2814666" vmhwm="4176177" />
- <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4814217" vmpeak="6932785" vmrss="4335193" vmhwm="6538194" />
- <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="998956" vmpeak="1136428" vmrss="307600" vmhwm="444735" />
- <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1052719" vmpeak="1232316" vmrss="717854" vmhwm="897540" />
- <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1258004" vmpeak="1258004" vmrss="326175" vmhwm="443996" />
- <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1059619" vmpeak="1138789" vmrss="582155" vmhwm="745664" />
- <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1249211" vmpeak="1506304" vmrss="550752" vmhwm="807762" />
- <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1492743" vmpeak="1714642" vmrss="1095354" vmhwm="1316988" />
- <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1427483" vmpeak="1512680" vmrss="582514" vmhwm="806858" />
- <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1456343" vmpeak="1595287" vmrss="978369" vmhwm="1201579" />
- <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="871930" vmpeak="952359" vmrss="193388" vmhwm="273634" />
- <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="878768" vmpeak="973180" vmrss="533348" vmhwm="627848" />
- <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="959909" vmpeak="1045106" vmrss="208156" vmhwm="273530" />
- <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="883818" vmpeak="969014" vmrss="406442" vmhwm="476595" />
- <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388405" vmpeak="1700311" vmrss="680352" vmhwm="991998" />
- <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1970503" vmpeak="2164422" vmrss="1583935" vmhwm="1777209" />
- <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1661649" vmpeak="1746846" vmrss="723148" vmhwm="991354" />
- <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1812694" vmpeak="1917910" vmrss="1335609" vmhwm="1524931" />
- <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1321320" vmpeak="1630896" vmrss="658730" vmhwm="968125" />
- <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1563660" vmpeak="2064852" vmrss="1226097" vmhwm="1727050" />
- <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1679251" vmpeak="1849645" vmrss="659406" vmhwm="966815" />
- <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1557181" vmpeak="1973176" vmrss="1079998" vmhwm="1579983" />
- <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="908549" vmpeak="908549" vmrss="180804" vmhwm="180804" />
- <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1315620" vmpeak="1315620" vmrss="978213" vmhwm="978213" />
- <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170239" vmpeak="1255436" vmrss="189326" vmhwm="189326" />
- <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1068553" vmpeak="1153750" vmrss="590298" vmhwm="590298" />
- <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1160718" vmpeak="1262736" vmrss="405376" vmhwm="507317" />
- <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1898410" vmpeak="1898410" vmrss="1560884" vmhwm="1560884" />
- <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1240917" vmpeak="1326114" vmrss="419094" vmhwm="507306" />
- <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1594502" vmpeak="1679698" vmrss="1116954" vmhwm="1116954" />
- <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="991671" vmpeak="1004291" vmrss="275397" vmhwm="287918" />
- <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1616690" vmpeak="1618188" vmrss="1278908" vmhwm="1280494" />
- <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1258623" vmpeak="1258623" vmrss="284320" vmhwm="287606" />
- <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1303156" vmpeak="1388353" vmrss="824928" vmhwm="824928" />
- <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1134889" vmpeak="1188636" vmrss="367130" vmhwm="384935" />
- <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1865047" vmpeak="1865047" vmrss="1527947" vmhwm="1527947" />
- <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1220882" vmpeak="1306078" vmrss="376006" vmhwm="384217" />
- <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1551019" vmpeak="1636216" vmrss="1071928" vmhwm="1071928" />
- <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1255898" vmpeak="1437160" vmrss="461385" vmhwm="642049" />
- <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1800479" vmpeak="1945580" vmrss="1462780" vmhwm="1607470" />
- <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1530053" vmpeak="1530053" vmrss="505570" vmhwm="641368" />
- <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1561955" vmpeak="1619753" vmrss="1084324" vmhwm="1225473" />
- <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2833797" vmpeak="3516609" vmrss="1409798" vmhwm="2092417" />
- <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="4293634" vmpeak="4293634" vmrss="3955525" vmhwm="3955525" />
- <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="3022032" vmpeak="3516609" vmrss="2255333" vmhwm="2255333" />
- <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="4277993" vmpeak="4363190" vmrss="3799333" vmhwm="3799333" />
- <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="1066384" vmpeak="1233736" vmrss="390972" vmhwm="557528" />
- <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="1358442" vmpeak="1615062" vmrss="1020947" vmhwm="1273121" />
- <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1243392" vmpeak="1328589" vmrss="398580" vmhwm="558469" />
- <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1256070" vmpeak="1398212" vmrss="778549" vmhwm="1001192" />
- <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437560" vmpeak="1625010" vmrss="754254" vmhwm="941142" />
- <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2281713" vmpeak="2410668" vmrss="1943780" vmhwm="2072428" />
- <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524473" vmpeak="1625005" vmrss="763001" vmhwm="940264" />
- <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2070671" vmpeak="2155868" vmrss="1593108" vmhwm="1719125" />
- <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="835629" vmpeak="889226" vmrss="164216" vmhwm="217245" />
- <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="983507" vmpeak="1024665" vmrss="645985" vmhwm="686930" />
- <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="927451" vmpeak="1012648" vmrss="168360" vmhwm="216569" />
- <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="900712" vmpeak="985909" vmrss="423519" vmhwm="463533" />
- <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="980636" vmpeak="1099706" vmrss="296680" vmhwm="415194" />
- <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="1326213" vmpeak="1409371" vmrss="988488" vmhwm="1071366" />
- <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1248691" vmpeak="1248691" vmrss="306857" vmhwm="414752" />
- <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1163032" vmpeak="1248228" vmrss="685843" vmhwm="765507" />
- <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189531" vmpeak="1393636" vmrss="513661" vmhwm="717204" />
- <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1866176" vmpeak="2002847" vmrss="1528664" vmhwm="1664577" />
- <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1457669" vmpeak="1457669" vmrss="523811" vmhwm="715837" />
- <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1606243" vmpeak="1691440" vmrss="1129185" vmhwm="1262534" />
- <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="1521920" vmpeak="1894167" vmrss="814210" vmhwm="1185704" />
- <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1961772" vmpeak="2317998" vmrss="1623268" vmhwm="1979062" />
- <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1789325" vmpeak="1894157" vmrss="828328" vmhwm="1185480" />
- <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1951877" vmpeak="2240295" vmrss="1479337" vmhwm="1843041" />
- <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="1427384" vmpeak="1755920" vmrss="719097" vmhwm="1047295" />
- <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="2059070" vmpeak="2371101" vmrss="1721616" vmhwm="2033194" />
- <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1694035" vmpeak="1779232" vmrss="732596" vmhwm="1046208" />
- <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1863825" vmpeak="2084664" vmrss="1386002" vmhwm="1691248" />
- <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720959" vmpeak="795839" vmrss="98898" vmhwm="98898" />
- <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="749106" vmpeak="749106" vmrss="411049" vmhwm="411049" />
- <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="806941" vmpeak="806941" vmrss="104702" vmhwm="104702" />
- <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727818" vmpeak="813014" vmrss="252787" vmhwm="252787" />
- <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727116" vmpeak="793010" vmrss="92508" vmhwm="92508" />
- <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="817554" vmpeak="817554" vmrss="479762" vmhwm="479762" />
- <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="813108" vmpeak="898305" vmrss="99481" vmhwm="99481" />
- <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="765070" vmpeak="850267" vmrss="290040" vmhwm="290040" />
- <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="694023" vmrss="34377" vmhwm="34377" />
- <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631919" vmpeak="631919" vmrss="294070" vmhwm="294070" />
- <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="779532" vmpeak="864728" vmrss="36524" vmhwm="36524" />
- <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618586" vmpeak="703783" vmrss="140582" vmhwm="140582" />
- <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="783447" vmrss="42936" vmhwm="42936" />
- <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="724302" vmpeak="724302" vmrss="386261" vmhwm="386339" />
- <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="1070524" vmpeak="1155720" vmrss="129376" vmhwm="129376" />
- <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="762933" vmpeak="848130" vmrss="284216" vmhwm="284216" />
- <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30700" vmhwm="30700" />
- <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="588270" vmpeak="610240" vmrss="250692" vmhwm="269453" />
- <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="958042" vmpeak="958042" vmrss="30908" vmhwm="30908" />
- <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605176" vmpeak="690372" vmrss="127602" vmhwm="129365" />
- <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="732747" vmpeak="732747" vmrss="146874" vmhwm="146874" />
- <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="778096" vmpeak="778096" vmrss="439654" vmhwm="439654" />
- <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="818864" vmpeak="904061" vmrss="148220" vmhwm="148220" />
- <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="781279" vmpeak="866476" vmrss="323528" vmhwm="323528" />
- <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="739559" vmpeak="739559" vmrss="67152" vmhwm="67152" />
- <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="769938" vmpeak="769938" vmrss="431922" vmhwm="431922" />
- <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="1007323" vmpeak="1007323" vmrss="99127" vmhwm="99127" />
- <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="760047" vmpeak="845244" vmrss="281866" vmhwm="281866" />
- <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1219296" vmpeak="1440462" vmrss="513271" vmhwm="733850" />
- <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1693062" vmpeak="1898192" vmrss="1355270" vmhwm="1559838" />
- <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1305881" vmpeak="1440556" vmrss="527399" vmhwm="732924" />
- <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1500881" vmpeak="1620819" vmrss="1022845" vmhwm="1226721" />
- <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1406802" vmpeak="1704736" vmrss="687445" vmhwm="984760" />
- <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2147516" vmpeak="2429642" vmrss="1810073" vmhwm="2091382" />
- <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1674363" vmpeak="1759560" vmrss="702972" vmhwm="984744" />
- <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1849614" vmpeak="2046543" vmrss="1371458" vmhwm="1652222" />
- <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1218568" vmpeak="1439734" vmrss="513505" vmhwm="734136" />
- <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1688476" vmpeak="1897693" vmrss="1350502" vmhwm="1559168" />
- <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1305106" vmpeak="1439828" vmrss="526188" vmhwm="732721" />
- <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1498400" vmpeak="1619649" vmrss="1021170" vmhwm="1226201" />
- <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1406007" vmpeak="1703941" vmrss="687798" vmhwm="985082" />
- <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2132431" vmpeak="2419976" vmrss="1795331" vmhwm="2082298" />
- <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1673562" vmpeak="1758759" vmrss="702202" vmhwm="984557" />
- <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1852832" vmpeak="2055175" vmrss="1375025" vmhwm="1661046" />
- <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="1214486" vmpeak="1422704" vmrss="531008" vmhwm="738576" />
- <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1653386" vmpeak="1850721" vmrss="1316047" vmhwm="1513090" />
- <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1307545" vmpeak="1422720" vmrss="553290" vmhwm="739018" />
- <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1505826" vmpeak="1597455" vmrss="1028154" vmhwm="1203888" />
- <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="1639840" vmpeak="2058960" vmrss="933025" vmhwm="1351495" />
- <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="2290340" vmpeak="2674006" vmrss="1952048" vmhwm="2335455" />
- <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1914021" vmpeak="2149482" vmrss="959363" vmhwm="1351006" />
- <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="2119436" vmpeak="2416320" vmrss="1662554" vmhwm="2022462" />
- <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705806" vmpeak="780353" vmrss="52806" vmhwm="52806" />
- <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="700835" vmpeak="700835" vmrss="362949" vmhwm="362949" />
- <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791934" vmpeak="791934" vmrss="56794" vmhwm="56794" />
- <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674611" vmpeak="759808" vmrss="198120" vmhwm="198120" />
- <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046858" vmpeak="1178912" vmrss="308542" vmhwm="439483" />
- <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1226248" vmpeak="1247022" vmrss="889018" vmhwm="909454" />
- <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1140729" vmpeak="1225926" vmrss="372574" vmhwm="439826" />
- <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1199894" vmpeak="1285091" vmrss="724178" vmhwm="734505" />
- <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2134158" vmpeak="2836756" vmrss="1438309" vmhwm="2140715" />
- <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2781932" vmpeak="3912818" vmrss="2443178" vmhwm="3574105" />
- <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2220634" vmpeak="2836865" vmrss="1468797" vmhwm="2139722" />
- <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2790174" vmpeak="3834277" vmrss="2311826" vmhwm="3439888" />
- <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2189153" vmpeak="2918723" vmrss="1491048" vmhwm="2220868" />
- <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2869105" vmpeak="4001228" vmrss="2531100" vmhwm="3662869" />
- <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2366254" vmpeak="2918817" vmrss="1523605" vmhwm="2221388" />
- <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877716" vmpeak="3944751" vmrss="2400091" vmhwm="3551449" />
- <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3569482" vmpeak="4949084" vmrss="2797106" vmhwm="4176364" />
- <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4819713" vmpeak="6984764" vmrss="4481042" vmhwm="6645126" />
- <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3929790" vmpeak="4858536" vmrss="2814931" vmhwm="4176198" />
- <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4816962" vmpeak="6932770" vmrss="4337715" vmhwm="6538006" />
- <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="1172662" vmpeak="1401509" vmrss="491966" vmhwm="720564" />
- <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1345822" vmpeak="1585391" vmrss="1008384" vmhwm="1247916" />
- <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1442381" vmpeak="1442381" vmrss="510697" vmhwm="720267" />
- <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1348219" vmpeak="1513917" vmrss="870485" vmhwm="1120215" />
- <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="1106159" vmpeak="1204460" vmrss="268408" vmhwm="366470" />
- <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="1568190" vmpeak="1568190" vmrss="1230538" vmhwm="1230538" />
- <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1395617" vmpeak="1395617" vmrss="399692" vmhwm="399692" />
- <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1513621" vmpeak="1598818" vmrss="1035897" vmhwm="1035897" />
- <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="1108187" vmpeak="1206488" vmrss="271648" vmhwm="369590" />
- <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="2870816" vmpeak="2870816" vmrss="1290972" vmhwm="1290972" />
- <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1396408" vmpeak="1396408" vmrss="396172" vmhwm="396172" />
- <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2778490" vmpeak="2863686" vmrss="2307058" vmhwm="2307058" />
- <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="979706" vmpeak="1098692" vmrss="295682" vmhwm="414247" />
- <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1303499" vmpeak="1390069" vmrss="965224" vmhwm="1051580" />
- <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1247750" vmpeak="1247750" vmrss="307928" vmhwm="415266" />
- <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1160265" vmpeak="1245462" vmrss="682354" vmhwm="766100" />
- <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304610" vmhwm="430336" />
- <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1170265" vmpeak="1281675" vmrss="833180" vmhwm="944299" />
- <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1253189" vmpeak="1253189" vmrss="316373" vmhwm="429618" />
- <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1091214" vmpeak="1176411" vmrss="613095" vmhwm="724110" />
- <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304772" vmhwm="430414" />
- <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="1150806" vmpeak="1261878" vmrss="813394" vmhwm="924123" />
- <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1253194" vmpeak="1253194" vmrss="315463" vmhwm="428974" />
- <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1090070" vmpeak="1175267" vmrss="612274" vmhwm="722924" />
- <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="705577" vmpeak="780457" vmrss="53320" vmhwm="53320" />
- <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="716476" vmpeak="716476" vmrss="378487" vmhwm="378487" />
- <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="972613" vmpeak="1057810" vmrss="57033" vmhwm="57033" />
- <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="672594" vmpeak="757790" vmrss="194183" vmhwm="194183" />
- <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="1863586" vmpeak="2298270" vmrss="1166578" vmhwm="1601236" />
- <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="3438385" vmpeak="3992487" vmrss="3100890" vmhwm="3654268" />
- <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="2136893" vmpeak="2298270" vmrss="1177888" vmhwm="1601350" />
- <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2866156" vmpeak="3332056" vmrss="2390778" vmhwm="2939315" />
- <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="1795970" vmpeak="2230654" vmrss="1095978" vmhwm="1530557" />
- <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="3373229" vmpeak="3883687" vmrss="3035104" vmhwm="3545068" />
- <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="2069298" vmpeak="2230675" vmrss="1108967" vmhwm="1530178" />
- <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2783367" vmpeak="3206626" vmrss="2308222" vmhwm="2813283" />
- <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="1389767" vmpeak="1653657" vmrss="587459" vmhwm="851136" />
- <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1997091" vmpeak="1999374" vmrss="1659538" vmhwm="1661498" />
- <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1660250" vmpeak="1660250" vmrss="717350" vmhwm="850948" />
- <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1842703" vmpeak="1927900" vmrss="1363991" vmhwm="1363991" />
- <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="783562" vmpeak="783562" vmrss="74089" vmhwm="74089" />
- <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="976300" vmpeak="976300" vmrss="639132" vmhwm="639132" />
- <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="1055204" vmpeak="1140401" vmrss="135018" vmhwm="135018" />
- <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="895616" vmpeak="980813" vmrss="418631" vmhwm="418631" />
- <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903520" vmpeak="903520" vmrss="182405" vmhwm="182405" />
- <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1300780" vmpeak="1300780" vmrss="963144" vmhwm="963144" />
- <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1261171" vmpeak="1346368" vmrss="191354" vmhwm="191354" />
- <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1066088" vmpeak="1151285" vmrss="588608" vmhwm="588608" />
- <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992097" vmpeak="1004718" vmrss="276021" vmhwm="288532" />
- <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1673510" vmpeak="1686178" vmrss="1335256" vmhwm="1346415" />
- <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259304" vmpeak="1259304" vmrss="285667" vmhwm="288584" />
- <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1318803" vmpeak="1404000" vmrss="840652" vmhwm="840652" />
- <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="742190" vmpeak="801429" vmrss="120036" vmhwm="120036" />
- <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="917155" vmpeak="917155" vmrss="580470" vmhwm="580470" />
- <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="828079" vmpeak="828079" vmrss="124950" vmhwm="124950" />
- <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="798803" vmpeak="884000" vmrss="322223" vmhwm="322223" />
- <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="1036542" vmpeak="1123340" vmrss="332675" vmhwm="418984" />
- <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1419095" vmpeak="1503018" vmrss="1081142" vmhwm="1164966" />
- <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1122513" vmpeak="1207710" vmrss="333564" vmhwm="417877" />
- <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1206654" vmpeak="1291851" vmrss="729799" vmhwm="812141" />
- <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2502557" vmpeak="2710479" vmrss="803394" vmhwm="1011098" />
- <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4844647" vmpeak="4844647" vmrss="4505820" vmhwm="4505820" />
- <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="927518" vmpeak="990735" vmrss="192327" vmhwm="255424" />
- <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1410156" vmpeak="1410156" vmrss="1071818" vmhwm="1071818" />
- <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1348308" vmpeak="1587736" vmrss="555162" vmhwm="794456" />
- <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2073328" vmpeak="2139914" vmrss="1735650" vmhwm="1801794" />
- <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="1137926" vmpeak="1282252" vmrss="347172" vmhwm="491384" />
- <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="1528581" vmpeak="1558133" vmrss="1191273" vmhwm="1220918" />
- <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="1064445" vmpeak="1124276" vmrss="233131" vmhwm="292728" />
- <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="1608666" vmpeak="1608666" vmrss="1270744" vmhwm="1270744" />
- <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1209941" vmpeak="1295138" vmrss="396422" vmhwm="396422" />
- <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1593238" vmpeak="1678435" vmrss="1137583" vmhwm="1257484" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713814" vmpeak="788028" vmrss="53034" vmhwm="53034" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="701729" vmpeak="701729" vmrss="363578" vmhwm="363578" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="799869" vmpeak="885066" vmrss="59810" vmhwm="59810" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="687694" vmpeak="772891" vmrss="209248" vmhwm="209248" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="706258" vmpeak="780140" vmrss="52884" vmhwm="52884" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="705052" vmpeak="705052" vmrss="367395" vmhwm="367395" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="973367" vmpeak="1058564" vmrss="56414" vmhwm="56414" />
- <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677320" vmpeak="762517" vmrss="198619" vmhwm="198619" />
- <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437061" vmpeak="1624516" vmrss="755024" vmhwm="942141" />
- <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2478034" vmpeak="2597150" vmrss="2139680" vmhwm="2258219" />
- <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524120" vmpeak="1624521" vmrss="762559" vmhwm="940914" />
- <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2100274" vmpeak="2185471" vmrss="1622847" vmhwm="1739566" />
- <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="748534" vmpeak="809437" vmrss="143514" vmhwm="143514" />
- <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="943758" vmpeak="943758" vmrss="606392" vmhwm="606392" />
- <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1015783" vmpeak="1015783" vmrss="147118" vmhwm="147118" />
- <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="835073" vmpeak="920270" vmrss="357146" vmhwm="357146" />
- <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="834953" vmpeak="887541" vmrss="164626" vmhwm="217001" />
- <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1034649" vmpeak="1064835" vmrss="696592" vmhwm="726694" />
- <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="921081" vmpeak="1006278" vmrss="167502" vmhwm="215597" />
- <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="911310" vmpeak="996507" vmrss="433617" vmhwm="464682" />
- <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="971453" vmpeak="1081683" vmrss="305390" vmhwm="415204" />
- <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1332598" vmpeak="1413375" vmrss="995165" vmhwm="1075859" />
- <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1148685" vmpeak="1233882" vmrss="314220" vmhwm="414882" />
- <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1167634" vmpeak="1252830" vmrss="689416" vmhwm="769002" />
- <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189630" vmpeak="1393740" vmrss="511908" vmhwm="715540" />
- <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1867418" vmpeak="2007080" vmrss="1529990" vmhwm="1668929" />
- <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1367256" vmpeak="1452453" vmrss="523946" vmhwm="715577" />
- <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1611350" vmpeak="1696546" vmrss="1133615" vmhwm="1270427" />
- <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2715268" vmpeak="3061650" vmrss="776375" vmhwm="1122695" />
- <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4160156" vmpeak="4971210" vmrss="3823164" vmhwm="4634151" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="701350" vmpeak="776562" vmrss="42281" vmhwm="42281" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="717771" vmpeak="717771" vmrss="379501" vmhwm="379501" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="786552" vmpeak="786552" vmrss="42406" vmhwm="42406" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="656084" vmpeak="741280" vmrss="177543" vmhwm="177543" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="705936" vmpeak="781149" vmrss="55619" vmhwm="55619" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="724765" vmpeak="724765" vmrss="386458" vmhwm="386458" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="791554" vmpeak="791554" vmrss="55582" vmhwm="55582" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="670987" vmpeak="756184" vmrss="193029" vmhwm="193029" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="720673" vmpeak="720673" vmrss="99512" vmhwm="99512" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="771253" vmpeak="771253" vmrss="433087" vmhwm="433087" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="987828" vmpeak="1073025" vmrss="104005" vmhwm="104005" />
- <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="726986" vmpeak="812182" vmrss="248450" vmhwm="248450" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="726554" vmpeak="793447" vmrss="91452" vmhwm="91452" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="857027" vmpeak="857027" vmrss="519630" vmhwm="519630" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="812619" vmpeak="897816" vmrss="100895" vmhwm="100895" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="764800" vmpeak="849997" vmrss="287019" vmhwm="287019" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="739960" vmpeak="739960" vmrss="134924" vmhwm="134924" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="905439" vmpeak="905439" vmrss="567876" vmhwm="567876" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="825988" vmpeak="891722" vmrss="144684" vmhwm="144684" />
- <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="821251" vmpeak="906448" vmrss="343085" vmhwm="343085" />
- <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="1026407" vmpeak="1026407" vmrss="351535" vmhwm="351535" />
- <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="1104485" vmpeak="1149496" vmrss="766740" vmhwm="811642" />
- <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1209280" vmpeak="1209280" vmrss="362325" vmhwm="362325" />
- <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1105275" vmpeak="1190472" vmrss="627822" vmhwm="671450" />
- <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="988072" vmpeak="1114146" vmrss="304798" vmhwm="430279" />
- <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="1171383" vmpeak="1282325" vmrss="833705" vmhwm="944476" />
- <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1164982" vmpeak="1250178" vmrss="319394" vmhwm="429904" />
- <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1090481" vmpeak="1115056" vmrss="613485" vmhwm="722176" />
- <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1185163" vmpeak="1406329" vmrss="511669" vmhwm="732674" />
- <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1646897" vmpeak="1857653" vmrss="1308538" vmhwm="1518940" />
- <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1361906" vmpeak="1447102" vmrss="515138" vmhwm="731073" />
- <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1486612" vmpeak="1612171" vmrss="1008602" vmhwm="1218973" />
- <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1361328" vmpeak="1659262" vmrss="685287" vmhwm="983091" />
- <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2053204" vmpeak="2340951" vmrss="1714788" vmhwm="2002072" />
- <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628504" vmpeak="1713701" vmrss="690892" vmhwm="983257" />
- <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1817290" vmpeak="2019841" vmrss="1338792" vmhwm="1625405" />
- <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="980148" vmpeak="1106211" vmrss="304340" vmhwm="430242" />
- <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1177410" vmpeak="1291040" vmrss="839217" vmhwm="952868" />
- <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1060997" vmpeak="1146194" vmrss="308906" vmhwm="429811" />
- <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1094189" vmpeak="1123038" vmrss="616548" vmhwm="730298" />
- <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1217086" vmpeak="1438262" vmrss="515611" vmhwm="736502" />
- <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1721532" vmpeak="1922648" vmrss="1383304" vmhwm="1584195" />
- <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1394296" vmpeak="1479493" vmrss="530197" vmhwm="735883" />
- <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1533625" vmpeak="1649492" vmrss="1055813" vmhwm="1256236" />
- <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1664005" vmpeak="1929070" vmrss="791611" vmhwm="988280" />
- <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2054062" vmpeak="2324472" vmrss="1715776" vmhwm="1985344" />
- <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1750642" vmpeak="1750642" vmrss="806811" vmhwm="988041" />
- <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1905020" vmpeak="2088814" vmrss="1426682" vmhwm="1694347" />
- <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="994541" vmpeak="1120615" vmrss="307034" vmhwm="432806" />
- <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="1212042" vmpeak="1312194" vmrss="874780" vmhwm="974438" />
- <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1081334" vmpeak="1166531" vmrss="322436" vmhwm="432702" />
- <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1116720" vmpeak="1132315" vmrss="638097" vmhwm="738348" />
- <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1467762" vmpeak="1671108" vmrss="691412" vmhwm="894509" />
- <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2625381" vmpeak="2732168" vmrss="2288915" vmhwm="2392494" />
- <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="713590" vmpeak="788138" vmrss="53216" vmhwm="53216" />
- <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724427" vmpeak="724427" vmrss="386354" vmhwm="386354" />
- <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="799604" vmpeak="799604" vmrss="59534" vmhwm="59534" />
- <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="685677" vmpeak="770874" vmrss="206845" vmhwm="206845" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="832010" vmpeak="832010" vmrss="144367" vmhwm="144367" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="920249" vmpeak="920249" vmrss="582769" vmhwm="582769" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="1009200" vmpeak="1094397" vmrss="156052" vmhwm="156052" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="851666" vmpeak="936863" vmrss="374660" vmhwm="374660" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="1357855" vmpeak="1537842" vmrss="428038" vmhwm="602841" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1748255" vmpeak="1748255" vmrss="1410474" vmhwm="1410474" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1539933" vmpeak="1625130" vmrss="506157" vmhwm="602326" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597762" vmpeak="1597762" vmrss="1125956" vmhwm="1125956" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="1508566" vmpeak="1688554" vmrss="427086" vmhwm="602414" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1694071" vmpeak="1694071" vmrss="1356300" vmhwm="1356300" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1418346" vmpeak="1507495" vmrss="498206" vmhwm="602238" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1533370" vmpeak="1618567" vmrss="1062006" vmhwm="1062006" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="912147" vmpeak="990698" vmrss="224068" vmhwm="302484" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1144707" vmpeak="1222395" vmrss="807570" vmhwm="885076" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="998842" vmpeak="1048663" vmrss="239059" vmhwm="302291" />
- <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1054336" vmpeak="1139533" vmrss="577106" vmhwm="651913" />
- <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="1046905" vmpeak="1206301" vmrss="351400" vmhwm="510603" />
- <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="1199005" vmpeak="1333363" vmrss="861400" vmhwm="995815" />
- <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1132003" vmpeak="1217200" vmrss="380998" vmhwm="509615" />
- <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1174336" vmpeak="1259533" vmrss="696300" vmhwm="857849" />
- <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133768" vmpeak="2836366" vmrss="1437966" vmhwm="2140403" />
- <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2803710" vmpeak="3934762" vmrss="2464961" vmhwm="3596054" />
- <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2400741" vmpeak="2836230" vmrss="1468438" vmhwm="2139410" />
- <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793221" vmpeak="3855737" vmrss="2313766" vmhwm="3461135" />
- <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188924" vmpeak="2918494" vmrss="1491630" vmhwm="2221008" />
- <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2899624" vmpeak="4031731" vmrss="2561410" vmhwm="3693086" />
- <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2274792" vmpeak="2918401" vmrss="1523438" vmhwm="2221039" />
- <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877160" vmpeak="3966222" vmrss="2398546" vmhwm="3572186" />
- <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1252357" vmpeak="1511010" vmrss="552931" vmhwm="811361" />
- <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1481464" vmpeak="1701512" vmrss="1144072" vmhwm="1363939" />
- <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1340471" vmpeak="1510438" vmrss="585192" vmhwm="810186" />
- <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1465339" vmpeak="1601189" vmrss="987604" vmhwm="1207902" />
- <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="872019" vmpeak="952447" vmrss="192904" vmhwm="272953" />
- <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="876340" vmpeak="970054" vmrss="538460" vmhwm="632299" />
- <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="959992" vmpeak="1045189" vmrss="207662" vmhwm="273093" />
- <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="883292" vmpeak="968489" vmrss="405891" vmhwm="476907" />
- <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="1248988" vmpeak="1505738" vmrss="549031" vmhwm="805745" />
- <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="1459816" vmpeak="1681716" vmrss="1121952" vmhwm="1343638" />
- <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1337055" vmpeak="1506221" vmrss="582212" vmhwm="806447" />
- <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1456322" vmpeak="1589104" vmrss="977688" vmhwm="1194798" />
- <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388498" vmpeak="1700405" vmrss="680981" vmhwm="992706" />
- <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1904952" vmpeak="2102276" vmrss="1567898" vmhwm="1764921" />
- <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1486066" vmpeak="1705636" vmrss="724443" vmhwm="992409" />
- <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1809121" vmpeak="1916995" vmrss="1331512" vmhwm="1523137" />
- <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="803400" vmpeak="848244" vmrss="123765" vmhwm="168360" />
- <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="795683" vmpeak="825796" vmrss="458718" vmhwm="488498" />
- <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="892273" vmpeak="977470" vmrss="139048" vmhwm="168292" />
- <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="789438" vmpeak="874634" vmrss="312400" vmhwm="338832" />
+
</models>
</attributes>
\ No newline at end of file
<value>GPU</value>
</devices>
<models>
- <value>caffe/FP32/alexnet/alexnet.xml</value>
- <value>caffe/FP32/caffenet/caffenet.xml</value>
- <value>caffe/FP32/densenet_121/densenet_121.xml</value>
- <value>caffe/FP32/densenet_161/densenet_161.xml</value>
- <value>caffe/FP32/densenet_169/densenet_169.xml</value>
- <value>caffe/FP32/densenet_201/densenet_201.xml</value>
- <value>caffe/FP32/dpn_92/dpn_92.xml</value>
- <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
- <value>caffe/FP32/inception_v1/inception_v1.xml</value>
- <value>caffe/FP32/inception_v2/inception_v2.xml</value>
- <value>caffe/FP32/inception_v3/inception_v3.xml</value>
- <value>caffe/FP32/inception_v4/inception_v4.xml</value>
- <value>caffe/FP32/lenet/lenet.xml</value>
- <value>caffe/FP32/mobilenet/mobilenet.xml</value>
- <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
- <value>caffe/FP32/resnet_18/resnet_18.xml</value>
- <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
- <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
- <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
- <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
- <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
- <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
- <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
- <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
- <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
- <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
- <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
- <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
- <value>caffe/FP32/vgg16/vgg16.xml</value>
- <value>caffe/FP32/vgg19/vgg19.xml</value>
- <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
- <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
- <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
- <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
- <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
- <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
- <value>caffe/FP32/dilation/dilation.xml</value>
- <value>caffe/FP32/dssd/dssd.xml</value>
- <value>caffe/FP32/fcn8/fcn8.xml</value>
- <value>caffe/FP32/fcn32/fcn32.xml</value>
- <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
- <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
- <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
- <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
- <value>caffe/FP32/openpose_face/openpose_face.xml</value>
- <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
- <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
- <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
- <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
- <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
- <value>caffe/FP32/vnect/vnect.xml</value>
- <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
- <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
- <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
- <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
- <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
- <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
- <value>tf/1.14.0/FP32/east/east.xml</value>
- <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
- <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
- <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
- <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
- <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
- <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
- <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
- <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
- <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
- <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
- <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
- <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
- <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
- <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
- <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
- <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
- <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
- <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
- <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
- <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
- <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
- <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
- <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
- <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
- <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
- <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
- <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
- <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
- <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
- <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
- <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
- <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
- <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
- <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
- <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
- <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
- <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
- <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
- <value>mxnet/FP32/caffenet/caffenet.xml</value>
- <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
- <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
- <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
- <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
- <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
- <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
- <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
- <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
- <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
- <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
- <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
- <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
- <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
- <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
- <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
- <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
- <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
- <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
- <value>mxnet/FP32/vgg16/vgg16.xml</value>
- <value>mxnet/FP32/vgg19/vgg19.xml</value>
- <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
- <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
- <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
- <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
- <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
- <value>mxnet/FP32/location_net/location_net.xml</value>
- <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
- <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
- <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
- <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
- <value>mxnet/FP32/nin/nin.xml</value>
- <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
- <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
- <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
- <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
- <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
- <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
- <value>onnx/FP32/retina_net/retina_net.xml</value>
- <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
- <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
- <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
- <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+ <value>public/mobilenet-v2-1.4-224/FP32/mobilenet-v2-1.4-224.xml</value>
+ <value>public/resnet-101/FP32/resnet-101.xml</value>
+ <value>public/brain-tumor-segmentation-0001/FP32/brain-tumor-segmentation-0001.xml</value>
+ <value>public/octave-resnet-101-0.125/FP32/octave-resnet-101-0.125.xml</value>
+ <value>public/faster_rcnn_inception_resnet_v2_atrous_coco/FP32/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+ <value>public/efficientnet-b7_auto_aug/FP32/efficientnet-b7_auto_aug.xml</value>
+ <value>public/yolo-v2-tf/FP32/yolo-v2-tf.xml</value>
+ <value>public/mobilenet-v2-1.0-224/FP32/mobilenet-v2-1.0-224.xml</value>
+ <value>public/colorization-v2-norebal/FP32/colorization-v2-norebal.xml</value>
+ <value>public/se-inception/FP32/se-inception.xml</value>
+ <value>public/efficientnet-b0/FP32/efficientnet-b0.xml</value>
+ <value>public/mobilenet-v1-1.0-224-tf/FP32/mobilenet-v1-1.0-224-tf.xml</value>
+ <value>public/mask_rcnn_resnet101_atrous_coco/FP32/mask_rcnn_resnet101_atrous_coco.xml</value>
+ <value>public/ssd_mobilenet_v1_coco/FP32/ssd_mobilenet_v1_coco.xml</value>
+ <value>public/se-resnet-152/FP32/se-resnet-152.xml</value>
+ <value>public/octave-resnext-50-0.25/FP32/octave-resnext-50-0.25.xml</value>
+ <value>public/googlenet-v3/FP32/googlenet-v3.xml</value>
+ <value>public/ssd_mobilenet_v2_coco/FP32/ssd_mobilenet_v2_coco.xml</value>
+ <value>public/alexnet/FP32/alexnet.xml</value>
+ <value>public/license-plate-recognition-barrier-0007/FP32/license-plate-recognition-barrier-0007.xml</value>
+ <value>public/mobilenet-v1-0.50-224/FP32/mobilenet-v1-0.50-224.xml</value>
+ <value>public/ssd_mobilenet_v1_fpn_coco/FP32/ssd_mobilenet_v1_fpn_coco.xml</value>
+ <value>public/vgg16/FP32/vgg16.xml</value>
+ <value>public/face-recognition-resnet34-arcface/FP32/face-recognition-resnet34-arcface.xml</value>
+ <value>public/gmcnn-places2-tf/FP32/gmcnn-places2-tf.xml</value>
+ <value>public/mobilenet-v1-1.0-224/FP32/mobilenet-v1-1.0-224.xml</value>
+ <value>public/se-resnet-101/FP32/se-resnet-101.xml</value>
+ <value>public/face-detection-retail-0044/FP32/face-detection-retail-0044.xml</value>
+ <value>public/face-recognition-mobilefacenet-arcface/FP32/face-recognition-mobilefacenet-arcface.xml</value>
+ <value>public/vehicle-license-plate-detection-barrier-0123/FP32/vehicle-license-plate-detection-barrier-0123.xml</value>
+ <value>public/densenet-161/FP32/densenet-161.xml</value>
+ <value>public/mask_rcnn_inception_resnet_v2_atrous_coco/FP32/mask_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+ <value>public/octave-resnext-101-0.25/FP32/octave-resnext-101-0.25.xml</value>
+ <value>public/face-recognition-resnet50-arcface/FP32/face-recognition-resnet50-arcface.xml</value>
+ <value>public/densenet-161-tf/FP32/densenet-161-tf.xml</value>
+ <value>public/octave-resnet-200-0.125/FP32/octave-resnet-200-0.125.xml</value>
+ <value>public/mtcnn-p/FP32/mtcnn-p.xml</value>
+ <value>public/se-resnext-101/FP32/se-resnext-101.xml</value>
+ <value>public/efficientnet-b5/FP32/efficientnet-b5.xml</value>
+ <value>public/densenet-169-tf/FP32/densenet-169-tf.xml</value>
+ <value>public/densenet-201/FP32/densenet-201.xml</value>
+ <value>public/resnet-50-tf/FP32/resnet-50-tf.xml</value>
+ <value>public/squeezenet1.1/FP32/squeezenet1.1.xml</value>
+ <value>public/squeezenet1.0/FP32/squeezenet1.0.xml</value>
+ <value>public/octave-resnet-26-0.25/FP32/octave-resnet-26-0.25.xml</value>
+ <value>public/googlenet-v4-tf/FP32/googlenet-v4-tf.xml</value>
+ <value>public/ssd300/FP32/ssd300.xml</value>
+ <value>public/rfcn-resnet101-coco-tf/FP32/rfcn-resnet101-coco-tf.xml</value>
+ <value>public/vgg19/FP32/vgg19.xml</value>
+ <value>public/ctdet_coco_dlav0_384/FP32/ctdet_coco_dlav0_384.xml</value>
+ <value>public/efficientnet-b0_auto_aug/FP32/efficientnet-b0_auto_aug.xml</value>
+ <value>public/googlenet-v1/FP32/googlenet-v1.xml</value>
+ <value>public/faster_rcnn_inception_v2_coco/FP32/faster_rcnn_inception_v2_coco.xml</value>
+ <value>public/mask_rcnn_inception_v2_coco/FP32/mask_rcnn_inception_v2_coco.xml</value>
+ <value>public/inception-resnet-v2-tf/FP32/inception-resnet-v2-tf.xml</value>
+ <value>public/deeplabv3/FP32/deeplabv3.xml</value>
+ <value>public/yolo-v3-tf/FP32/yolo-v3-tf.xml</value>
+ <value>public/resnet-152/FP32/resnet-152.xml</value>
+ <value>public/mtcnn-o/FP32/mtcnn-o.xml</value>
+ <value>public/octave-se-resnet-50-0.125/FP32/octave-se-resnet-50-0.125.xml</value>
+ <value>public/yolo-v1-tiny-tf/FP32/yolo-v1-tiny-tf.xml</value>
+ <value>public/resnet-50/FP32/resnet-50.xml</value>
+ <value>public/googlenet-v1-tf/FP32/googlenet-v1-tf.xml</value>
+ <value>public/yolo-v2-tiny-tf/FP32/yolo-v2-tiny-tf.xml</value>
+ <value>public/ssd512/FP32/ssd512.xml</value>
+ <value>public/densenet-169/FP32/densenet-169.xml</value>
+ <value>public/brain-tumor-segmentation-0002/FP32/brain-tumor-segmentation-0002.xml</value>
+ <value>public/Sphereface/FP32/Sphereface.xml</value>
+ <value>public/googlenet-v2/FP32/googlenet-v2.xml</value>
+ <value>public/face-recognition-resnet100-arcface/FP32/face-recognition-resnet100-arcface.xml</value>
+ <value>public/mobilenet-v1-0.25-128/FP32/mobilenet-v1-0.25-128.xml</value>
+ <value>public/ctdet_coco_dlav0_512/FP32/ctdet_coco_dlav0_512.xml</value>
+ <value>public/facenet-20180408-102900/FP32/facenet-20180408-102900.xml</value>
+ <value>public/ctpn/FP32/ctpn.xml</value>
+ <value>public/ssdlite_mobilenet_v2/FP32/ssdlite_mobilenet_v2.xml</value>
+ <value>public/i3d-rgb-tf/FP32/i3d-rgb-tf.xml</value>
+ <value>public/mobilenet-v2/FP32/mobilenet-v2.xml</value>
+ <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+ <value>public/se-resnext-50/FP32/se-resnext-50.xml</value>
+ <value>public/caffenet/FP32/caffenet.xml</value>
+ <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
+ <value>public/faster_rcnn_resnet50_coco/FP32/faster_rcnn_resnet50_coco.xml</value>
+ <value>public/se-resnet-50/FP32/se-resnet-50.xml</value>
+ <value>public/mask_rcnn_resnet50_atrous_coco/FP32/mask_rcnn_resnet50_atrous_coco.xml</value>
+ <value>public/octave-resnet-50-0.125/FP32/octave-resnet-50-0.125.xml</value>
+ <value>public/densenet-121-tf/FP32/densenet-121-tf.xml</value>
+ <value>public/mobilenet-v1-0.50-160/FP32/mobilenet-v1-0.50-160.xml</value>
+ <value>public/densenet-121/FP32/densenet-121.xml</value>
+ <value>public/faster_rcnn_resnet101_coco/FP32/faster_rcnn_resnet101_coco.xml</value>
+ <value>public/octave-densenet-121-0.125/FP32/octave-densenet-121-0.125.xml</value>
+ <value>public/colorization-v2/FP32/colorization-v2.xml</value>
+ <value>public/densenet-121-caffe2/FP32/densenet-121-caffe2.xml</value>
+ <value>public/efficientnet-b0-pytorch/FP32/efficientnet-b0-pytorch.xml</value>
+ <value>public/efficientnet-b5-pytorch/FP32/efficientnet-b5-pytorch.xml</value>
+ <value>public/efficientnet-b7-pytorch/FP32/efficientnet-b7-pytorch.xml</value>
+ <value>public/googlenet-v3-pytorch/FP32/googlenet-v3-pytorch.xml</value>
+ <value>public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml</value>
+ <value>public/midasnet/FP32/midasnet.xml</value>
+ <value>public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml</value>
+ <value>public/resnet-18-pytorch/FP32/resnet-18-pytorch.xml</value>
+ <value>public/resnet-50-caffe2/FP32/resnet-50-caffe2.xml</value>
+ <value>public/resnet-50-pytorch/FP32/resnet-50-pytorch.xml</value>
+ <value>public/single-human-pose-estimation-0001/FP32/single-human-pose-estimation-0001.xml</value>
+ <value>public/squeezenet1.1-caffe2/FP32/squeezenet1.1-caffe2.xml</value>
+ <value>public/vgg19-caffe2/FP32/vgg19-caffe2.xml</value>
+ <value>intel/facial-landmarks-35-adas-0002/FP32/facial-landmarks-35-adas-0002.xml</value>
+ <value>intel/vehicle-attributes-recognition-barrier-0039/FP32/vehicle-attributes-recognition-barrier-0039.xml</value>
+ <value>intel/person-detection-action-recognition-0006/FP32/person-detection-action-recognition-0006.xml</value>
+ <value>intel/asl-recognition-0004/FP32/asl-recognition-0004.xml</value>
+ <value>intel/yolo-v2-tiny-ava-sparse-30-0001/FP32/yolo-v2-tiny-ava-sparse-30-0001.xml</value>
+ <value>intel/text-detection-0004/FP32/text-detection-0004.xml</value>
+ <value>intel/person-vehicle-bike-detection-crossroad-1016/FP32/person-vehicle-bike-detection-crossroad-1016.xml</value>
+ <value>intel/text-spotting-0002-detector/FP32/text-spotting-0002-detector.xml</value>
+ <value>intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013.xml</value>
+ <value>intel/vehicle-detection-adas-0002/FP32/vehicle-detection-adas-0002.xml</value>
+ <value>intel/image-retrieval-0001/FP32/image-retrieval-0001.xml</value>
+ <value>intel/person-detection-retail-0002/FP32/person-detection-retail-0002.xml</value>
+ <value>intel/person-attributes-recognition-crossroad-0230/FP32/person-attributes-recognition-crossroad-0230.xml</value>
+ <value>intel/face-detection-0100/FP32/face-detection-0100.xml</value>
+ <value>intel/face-detection-0102/FP32/face-detection-0102.xml</value>
+ <value>intel/person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.xml</value>
+ <value>intel/person-reidentification-retail-0300/FP32/person-reidentification-retail-0300.xml</value>
+ <value>intel/instance-segmentation-security-0010/FP32/instance-segmentation-security-0010.xml</value>
+ <value>intel/instance-segmentation-security-0083/FP32/instance-segmentation-security-0083.xml</value>
+ <value>intel/face-detection-0105/FP32/face-detection-0105.xml</value>
+ <value>intel/face-detection-0104/FP32/face-detection-0104.xml</value>
+ <value>intel/icnet-camvid-ava-sparse-30-0001/FP32/icnet-camvid-ava-sparse-30-0001.xml</value>
+ <value>intel/action-recognition-0001-decoder/FP32/action-recognition-0001-decoder.xml</value>
+ <value>intel/face-detection-0106/FP32/face-detection-0106.xml</value>
+ <value>intel/person-detection-action-recognition-teacher-0002/FP32/person-detection-action-recognition-teacher-0002.xml</value>
+ <value>intel/person-vehicle-bike-detection-crossroad-0078/FP32/person-vehicle-bike-detection-crossroad-0078.xml</value>
+ <value>intel/icnet-camvid-ava-sparse-60-0001/FP32/icnet-camvid-ava-sparse-60-0001.xml</value>
+ <value>intel/face-detection-adas-0001/FP32/face-detection-adas-0001.xml</value>
+ <value>intel/unet-camvid-onnx-0001/FP32/unet-camvid-onnx-0001.xml</value>
+ <value>intel/human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml</value>
+ <value>intel/faster-rcnn-resnet101-coco-sparse-60-0001/FP32/faster-rcnn-resnet101-coco-sparse-60-0001.xml</value>
+ <value>intel/action-recognition-0001-encoder/FP32/action-recognition-0001-encoder.xml</value>
+ <value>intel/yolo-v2-ava-sparse-35-0001/FP32/yolo-v2-ava-sparse-35-0001.xml</value>
+ <value>intel/yolo-v2-ava-sparse-70-0001/FP32/yolo-v2-ava-sparse-70-0001.xml</value>
+ <value>intel/person-reidentification-retail-0248/FP32/person-reidentification-retail-0248.xml</value>
+ <value>intel/person-detection-raisinghand-recognition-0001/FP32/person-detection-raisinghand-recognition-0001.xml</value>
+ <value>intel/person-detection-asl-0001/FP32/person-detection-asl-0001.xml</value>
+ <value>intel/emotions-recognition-retail-0003/FP32/emotions-recognition-retail-0003.xml</value>
+ <value>intel/yolo-v2-tiny-ava-0001/FP32/yolo-v2-tiny-ava-0001.xml</value>
+ <value>intel/license-plate-recognition-barrier-0001/FP32/license-plate-recognition-barrier-0001.xml</value>
+ <value>intel/person-detection-retail-0013/FP32/person-detection-retail-0013.xml</value>
+ <value>intel/instance-segmentation-security-0050/FP32/instance-segmentation-security-0050.xml</value>
+ <value>intel/single-image-super-resolution-1032/FP32/single-image-super-resolution-1032.xml</value>
+ <value>intel/landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml</value>
+ <value>intel/driver-action-recognition-adas-0002-decoder/FP32/driver-action-recognition-adas-0002-decoder.xml</value>
+ <value>intel/person-reidentification-retail-0249/FP32/person-reidentification-retail-0249.xml</value>
+ <value>intel/text-spotting-0002-recognizer-decoder/FP32/text-spotting-0002-recognizer-decoder.xml</value>
+ <value>intel/yolo-v2-ava-0001/FP32/yolo-v2-ava-0001.xml</value>
+ <value>intel/person-detection-action-recognition-0005/FP32/person-detection-action-recognition-0005.xml</value>
+ <value>intel/text-recognition-0012/FP32/text-recognition-0012.xml</value>
+ <value>intel/face-detection-retail-0004/FP32/face-detection-retail-0004.xml</value>
+ <value>intel/product-detection-0001/FP32/product-detection-0001.xml</value>
+ <value>intel/yolo-v2-tiny-ava-sparse-60-0001/FP32/yolo-v2-tiny-ava-sparse-60-0001.xml</value>
+ <value>intel/face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml</value>
+ <value>intel/road-segmentation-adas-0001/FP32/road-segmentation-adas-0001.xml</value>
+ <value>intel/single-image-super-resolution-1033/FP32/single-image-super-resolution-1033.xml</value>
+ <value>intel/face-detection-retail-0005/FP32/face-detection-retail-0005.xml</value>
+ <value>intel/pedestrian-and-vehicle-detector-adas-0001/FP32/pedestrian-and-vehicle-detector-adas-0001.xml</value>
+ <value>intel/handwritten-japanese-recognition-0001/FP32/handwritten-japanese-recognition-0001.xml</value>
+ <value>intel/semantic-segmentation-adas-0001/FP32/semantic-segmentation-adas-0001.xml</value>
+ <value>intel/pedestrian-detection-adas-0002/FP32/pedestrian-detection-adas-0002.xml</value>
+ <value>intel/driver-action-recognition-adas-0002-encoder/FP32/driver-action-recognition-adas-0002-encoder.xml</value>
+ <value>intel/text-detection-0003/FP32/text-detection-0003.xml</value>
+ <value>intel/text-spotting-0002-recognizer-encoder/FP32/text-spotting-0002-recognizer-encoder.xml</value>
+ <value>intel/handwritten-score-recognition-0003/FP32/handwritten-score-recognition-0003.xml</value>
+ <value>intel/icnet-camvid-ava-0001/FP32/icnet-camvid-ava-0001.xml</value>
+ <value>intel/text-image-super-resolution-0001/FP32/text-image-super-resolution-0001.xml</value>
+ <value>intel/gaze-estimation-adas-0002/FP32/gaze-estimation-adas-0002.xml</value>
+ <value>intel/head-pose-estimation-adas-0001/FP32/head-pose-estimation-adas-0001.xml</value>
+ <value>intel/vehicle-license-plate-detection-barrier-0106/FP32/vehicle-license-plate-detection-barrier-0106.xml</value>
+ <value>intel/instance-segmentation-security-1025/FP32/instance-segmentation-security-1025.xml</value>
</models>
</attributes>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <irs_path>
+ <value>${STRESS_IRS_PATH}</value>
+ </irs_path>
+</attributes>
<value>GPU</value>
</devices>
<models>
- <value>caffe/FP32/alexnet/alexnet.xml</value>
- <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+ <value>public/alexnet/FP32/alexnet.xml</value>
+ <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+ <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
</models>
</attributes>
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <irs_path>
+ <value>${STRESS_IRS_PATH}</value>
+ </irs_path>
+</attributes>
<value>GPU</value>
</devices>
<models>
- <value>caffe/FP32/alexnet/alexnet.xml</value>
- <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+ <value>public/alexnet/FP32/alexnet.xml</value>
+ <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+ <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
</models>
</attributes>
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <irs_path>
+ <value>${STRESS_IRS_PATH}</value>
+ </irs_path>
+</attributes>
<value>GPU</value>
</devices>
<models>
- <value>caffe/FP32/alexnet/alexnet.xml</value>
- <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+ <value>public/alexnet/FP32/alexnet.xml</value>
+ <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+ <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
</models>
</attributes>
--- /dev/null
+<?xml version="1.0"?>
+<attributes>
+ <irs_path>
+ <value>${STRESS_IRS_PATH}</value>
+ </irs_path>
+</attributes>
<value>GPU</value>
</devices>
<models>
- <!--<value>caffe/FP32/alexnet/alexnet.xml</value>-->
- <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
- <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+ <value>public/alexnet/FP32/alexnet.xml</value>
+ <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+ <!--<value>public/mtcnn-r/FP32/mtcnn-r.xml</value>-->
</models>
</attributes>
# Name of virtualenv created by stress_tests/scripts/get_testdata.py
-./.stress_venv
\ No newline at end of file
+./.stress_venv
+
+# File with models names generated by stress_tests/scripts/get_testdata.py to use in OMZ downloader.py
+./scripts/models_list.txt
``` bash
gtest-parallel ./StressMemLeaksTests
```
+There are the next available command-line keys:
+1. --test_conf < path > - path to config with description of arguments
+ used to parametrize tests
+2. --test_conf < path > - path to config with definition of environment values
+ (path to models etc.)
+3. --refs_conf < path > (available for MemCheckTests only) - path to config with references used to
+ compare with results of a run
+4. --collect_results_only < bool > (available for MemCheckTests only) - boolean value that disable comparison and
+ provide memory consumption results only
+
MemCheckTests logs can be used to gather reference values based on current
memory consumption:
_env_config.reset(env_config);
}
+const bool & Environment::getCollectResultsOnly() {
+ return _collect_results_only;
+}
+
+void Environment::setCollectResultsOnly(const bool &collect_results_only) {
+ _collect_results_only = collect_results_only;
+}
+
std::vector<TestCase> generateTestsParams(std::initializer_list<std::string> fields) {
std::vector<TestCase> tests_cases;
const pugi::xml_document & test_config = Environment::Instance().getTestConfig();
private:
pugi::xml_document _test_config;
pugi::xml_document _env_config;
+ bool _collect_results_only = false;
+
Environment() = default;
Environment(const Environment&) = delete;
Environment& operator=(const Environment&) = delete;
void setTestConfig(const pugi::xml_document &test_config);
const pugi::xml_document & getEnvConfig();
void setEnvConfig(const pugi::xml_document &env_config);
+ const bool & getCollectResultsOnly();
+ void setCollectResultsOnly(const bool &collect_results_only);
};
std::vector<TestCase> generateTestsParams(std::initializer_list<std::string> items);
/// env_conf is an optional parameter
DEFINE_string(env_conf, OS_PATH_JOIN({"stress_tests_configs", "memcheck_tests", "env_config.xml"}), env_conf_message);
-/// @brief message for env_config argument
+/// @brief message for refs_config argument
static const char refs_conf_message[] = "Optional. Path to a references config with values of memory consumption per test.";
/// @brief Define parameter for set references' configuration <br>
/// refs_conf is an optional parameter
-DEFINE_string(refs_conf, OS_PATH_JOIN({"stress_tests_configs", "memcheck_tests", "references_config.xml"}), refs_conf_message);
\ No newline at end of file
+DEFINE_string(refs_conf, OS_PATH_JOIN({"stress_tests_configs", "memcheck_tests", "references_config.xml"}), refs_conf_message);
+
+/// @brief message for collect_results_only argument
+static const char collect_results_only_message[] = "Optional. Path to a references config with values of memory consumption per test.";
+
+/// @brief Define parameter for mode with collecting results only <br>
+/// collect_results_only is an optional parameter
+DEFINE_bool(collect_results_only, false, collect_results_only_message);
<attributes>
<models>
<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="CPU" vmsize="740214" vmpeak="805110" vmrss="129308" vmhwm="129308" />
- <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="GPU" vmsize="739154" vmpeak="739154" vmrss="346522" vmhwm="346522" />
+ <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="GPU" vmsize="922147" vmpeak="922147" vmrss="587522" vmhwm="587522" />
<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="CPU" vmsize="1007890" vmpeak="1007890" vmrss="138652" vmhwm="138652" />
- <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="GPU" vmsize="824366" vmpeak="909563" vmrss="347167" vmhwm="347167" />
+ <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="GPU" vmsize="1006439" vmpeak="1091636" vmrss="587241" vmhwm="587241" />
<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="CPU" vmsize="691589" vmpeak="922864" vmrss="31054" vmhwm="31054" />
- <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="GPU" vmsize="520530" vmpeak="522740" vmrss="127706" vmhwm="129630" />
+ <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="GPU" vmsize="626194" vmpeak="626194" vmrss="290695" vmhwm="290695" />
<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="CPU" vmsize="958240" vmpeak="1043437" vmrss="31366" vmhwm="31366" />
- <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="GPU" vmsize="605727" vmpeak="690924" vmrss="127753" vmhwm="129537" />
+ <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="GPU" vmsize="708734" vmpeak="793930" vmrss="287877" vmhwm="287877" />
<model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="CPU" vmsize="1046988" vmpeak="1179042" vmrss="307990" vmhwm="439457" />
- <model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="GPU" vmsize="1108775" vmpeak="1126985" vmrss="716341" vmhwm="734578" />
+ <model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="GPU" vmsize="1267775" vmpeak="1279647" vmrss="932672" vmhwm="944626" />
<model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="CPU" vmsize="1321819" vmpeak="1321819" vmrss="374207" vmhwm="439748" />
- <model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="GPU" vmsize="1199957" vmpeak="1285154" vmrss="728046" vmhwm="734593" />
+ <model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="GPU" vmsize="1356565" vmpeak="1441762" vmrss="941418" vmhwm="947060" />
<model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133814" vmpeak="2836412" vmrss="1438049" vmhwm="2140533" />
- <model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707988" vmpeak="3834209" vmrss="2313022" vmhwm="3439202" />
+ <model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2801422" vmpeak="3915366" vmrss="2465065" vmhwm="3578811" />
<model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401380" vmpeak="2836412" vmrss="1469832" vmhwm="2140377" />
- <model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793211" vmpeak="3834235" vmrss="2314192" vmhwm="3439550" />
+ <model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2892432" vmpeak="3939166" vmrss="2472017" vmhwm="3602924" />
</models>
</attributes>
return 0; // TODO return correct status
}
+ Environment::Instance().setCollectResultsOnly(FLAGS_collect_results_only);
pugi::xml_document config;
config.load_file(FLAGS_test_conf.c_str());
Environment::Instance().setTestConfig(config);
#include <gtest/gtest.h>
-#define checkRefVmValues() \
- ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmSize is less than 0. Value: " << test_refs.ref_vmsize; \
- ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmPeak is less than 0. Value: " << test_refs.ref_vmpeak; \
- ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmRSS is less than 0. Value: " << test_refs.ref_vmrss; \
- ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmHWM is less than 0. Value: " << test_refs.ref_vmhwm;
+#define checkRefVmValues() \
+ if (!Environment::Instance().getCollectResultsOnly()) { \
+ ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmSize is less than 0. Value: " \
+ << test_refs.ref_vmsize; \
+ ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmPeak is less than 0. Value: " \
+ << test_refs.ref_vmpeak; \
+ ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmRSS is less than 0. Value: " \
+ << test_refs.ref_vmrss; \
+ ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmHWM is less than 0. Value: " \
+ << test_refs.ref_vmhwm; \
+ }
class MemCheckTestSuite : public ::testing::TestWithParam<TestCase> {
};
log_info_ref_mem_usage();
log_info_cur_mem_usage();
- if (test_cur_vmhwm > ref_vmhwm)
+ if ((!Environment::Instance().getCollectResultsOnly()) && (test_cur_vmhwm > ref_vmhwm))
return TestResult(TestStatus::TEST_FAILED,
"Test failed: HWM (peak of RSS) virtual memory consumption is greater than reference.\n"
"Reference HWM of memory consumption: " + std::to_string(ref_vmhwm) + " KB.\n" +
getAlignedVmValues(test_cur_vmsize, test_cur_vmpeak, test_cur_vmrss, test_cur_vmhwm,
vmsize_before_test, vmrss_before_test);
- if (test_cur_vmrss > ref_vmrss) {
+ if ((!Environment::Instance().getCollectResultsOnly()) && (test_cur_vmrss > ref_vmrss)) {
log_debug_ref_record_for_test("infer_request_inference");
return TestResult(TestStatus::TEST_FAILED,
"Test failed: RSS virtual memory consumption became greater than reference "
#include <math.h>
-#include <inference_engine.hpp>
#include <algorithm>
#include <array>
+#include <inference_engine.hpp>
#include <string>
using namespace InferenceEngine;
#define MAX_OUTLIERS 5
// Maximum number of measuring pipeline restarts
#define MAX_RETRY 3
+// Maximum values to compute an average for reference
+#define MAX_AVERAGE 100
+// Size of log line string to pre-allocate
+#define LOG_LINE_RESERVE 1024
// A threshold for which memory growth will be considered an error
#define THRESHOLD 0.1
// Measure values
enum MeasureValue { VMRSS = 0, VMHWM, VMSIZE, VMPEAK, MeasureValueMax };
-namespace util {
+namespace util {
template <typename In, typename Out, typename Func>
void transform(const In& in, Out& out, const Func& func) {
std::transform(std::begin(in), std::end(in), std::begin(out), func);
std::array<bool, MeasureValueMax> outlier = {0}; // flag if current does not fit threshold
std::array<int, MeasureValueMax> outlier_count = {0}; // counter for how many times current does not fit threshold
std::array<float, MeasureValueMax> threshold = {0}; // ref * THRESHOLD
+ std::vector<std::array<long, MeasureValueMax>> past; // past measures
std::string progress_str;
- progress_str.reserve(1024);
+ progress_str.reserve(LOG_LINE_RESERVE);
+ past.resize(std::min(n / 2, MAX_AVERAGE));
log_info("Warming up for " << WARMUP_STEPS << " iterations");
log_info("i\tVMRSS\tVMHWM\tVMSIZE\tVMPEAK");
int measure_count = n;
- for (int iteration = 0; measure_count > 0; iteration++) {
+ for (size_t iteration = 0; measure_count > 0; iteration++) {
// Warm up to take reference values
test_pipeline();
getVmValues(cur[VMSIZE], cur[VMPEAK], cur[VMRSS], cur[VMHWM]);
+ past[iteration % past.size()] = cur;
progress_str = std::to_string(iteration + 1) + "\t" + std::to_string(cur[VMRSS]) + "\t" +
std::to_string(cur[VMHWM]) + "\t" + std::to_string(cur[VMSIZE]) + "\t" +
std::to_string(cur[VMPEAK]);
retry_count++;
measure_count = n;
outlier_count = {0};
- ref = cur;
+ // set reference as an average of `past` elements
+ ref = {0};
+ size_t past_size = std::min(iteration + 1, past.size()); // count number of past elements
+ for (size_t i = 0; i < past_size; i++) {
+ // ref = ref + past
+ util::transform(ref, past[i], ref, [](long ref_val, long past_val) -> long {
+ return ref_val + past_val;
+ });
+ }
+ // ref = ref / past_size
+ util::transform(ref, ref, [&past_size](long ref_val) -> float {
+ return ref_val / past_size;
+ });
+ // threshold = THRESHOLD * ref
util::transform(ref, threshold, [](long ref_val) -> float {
return THRESHOLD * ref_val;
});
- log_info("Setting thresholds VMRSS=" << ref[VMRSS] << "(+-" << static_cast<int>(threshold[VMRSS])
- << "), VMHWM=" << ref[VMHWM] << "(+-"
- << static_cast<int>(threshold[VMHWM]) << ")");
+ log_info("Setting thresholds to average of "
+ << past_size << " past elements:"
+ << " VMRSS=" << ref[VMRSS] << "(+-" << static_cast<int>(threshold[VMRSS]) << "),"
+ << " VMHWM=" << ref[VMHWM] << "(+-" << static_cast<int>(threshold[VMHWM]) << ")");
}
measure_count--;
+ // diff = cur - ref
util::transform(cur, ref, diff, [](long cur_val, long ref_val) -> long {
- return labs(cur_val - ref_val);
+ // no labs() here - ignore cur smaller than ref
+ return cur_val - ref_val;
});
+ // outlier = diff > threshold
util::transform(diff, threshold, outlier, [](long diff_val, float threshold_val) -> bool {
return diff_val > threshold_val;
});
+ // outlier_count = outlier_count + (outlier ? 1 : 0)
util::transform(outlier, outlier_count, outlier_count,
[](bool outlier_val, long outlier_count_val) -> long {
return outlier_count_val + (outlier_val ? 1 : 0);
return TestResult(TestStatus::TEST_OK, "");
}
-
-TestResult test_load_unload_plugin(const std::string &target_device, const int &n) {
+TestResult test_load_unload_plugin(const std::string& target_device, const int& n) {
log_info("Load/unload plugin for device: " << target_device << " for " << n << " times");
return common_test_pipeline(load_unload_plugin(target_device), n);
}
-TestResult test_read_network(const std::string &model, const int &n) {
+TestResult test_read_network(const std::string& model, const int& n) {
log_info("Read network: \"" << model << "\" for " << n << " times");
return common_test_pipeline(read_network(model), n);
}
-TestResult test_create_cnnnetwork(const std::string &model, const int &n) {
+TestResult test_create_cnnnetwork(const std::string& model, const int& n) {
log_info("Create CNNNetwork from network: \"" << model << "\" for " << n << " times");
return common_test_pipeline(create_cnnnetwork(model), n);
}
-TestResult test_cnnnetwork_reshape_batch_x2(const std::string &model, const int &n) {
+TestResult test_cnnnetwork_reshape_batch_x2(const std::string& model, const int& n) {
log_info("Reshape to batch*=2 of CNNNetwork created from network: \"" << model << "\" for " << n << " times");
return common_test_pipeline(cnnnetwork_reshape_batch_x2(model), n);
}
-TestResult test_set_input_params(const std::string &model, const int &n) {
+TestResult test_set_input_params(const std::string& model, const int& n) {
log_info("Apply preprocessing for CNNNetwork from network: \"" << model << "\" for " << n << " times");
return common_test_pipeline(set_input_params(model), n);
}
-TestResult test_create_exenetwork(const std::string &model, const std::string &target_device, const int &n) {
- log_info("Create ExecutableNetwork from network: \"" << model
- << "\" for device: \"" << target_device << "\" for " << n
- << " times");
+TestResult test_create_exenetwork(const std::string& model, const std::string& target_device, const int& n) {
+ log_info("Create ExecutableNetwork from network: \"" << model << "\" for device: \"" << target_device << "\" for "
+ << n << " times");
return common_test_pipeline(create_exenetwork(model, target_device), n);
}
-TestResult
-test_recreate_exenetwork(InferenceEngine::Core &ie, const std::string &model, const std::string &target_device,
- const int &n) {
- log_info("Recreate ExecutableNetwork from network within existing InferenceEngine::Core: \"" << model
- << "\" for device: \""
- << target_device
- << "\" for " << n
- << " times");
+TestResult test_recreate_exenetwork(InferenceEngine::Core& ie, const std::string& model,
+ const std::string& target_device, const int& n) {
+ log_info("Recreate ExecutableNetwork from network within existing InferenceEngine::Core: \""
+ << model << "\" for device: \"" << target_device << "\" for " << n << " times");
return common_test_pipeline(recreate_exenetwork(ie, model, target_device), n);
}
-TestResult test_create_infer_request(const std::string &model, const std::string &target_device, const int &n) {
- log_info("Create InferRequest from network: \"" << model
- << "\" for device: \"" << target_device << "\" for " << n
+TestResult test_create_infer_request(const std::string& model, const std::string& target_device, const int& n) {
+ log_info("Create InferRequest from network: \"" << model << "\" for device: \"" << target_device << "\" for " << n
<< " times");
return common_test_pipeline(create_infer_request(model, target_device), n);
}
-TestResult
-test_recreate_infer_request(ExecutableNetwork &network, const std::string &model, const std::string &target_device,
- const int &n) {
- log_info("Create InferRequest from network: \"" << model
- << "\" for device: \"" << target_device << "\" for " << n
+TestResult test_recreate_infer_request(ExecutableNetwork& network, const std::string& model,
+ const std::string& target_device, const int& n) {
+ log_info("Create InferRequest from network: \"" << model << "\" for device: \"" << target_device << "\" for " << n
<< " times");
return common_test_pipeline(recreate_infer_request(network), n);
}
-TestResult
-test_infer_request_inference(const std::string &model, const std::string &target_device, const int &n) {
- log_info("Inference of InferRequest from network: \"" << model
- << "\" for device: \"" << target_device << "\" for " << n
- << " times");
+TestResult test_infer_request_inference(const std::string& model, const std::string& target_device, const int& n) {
+ log_info("Inference of InferRequest from network: \"" << model << "\" for device: \"" << target_device << "\" for "
+ << n << " times");
return common_test_pipeline(infer_request_inference(model, target_device), n);
}
-TestResult
-test_reinfer_request_inference(InferenceEngine::InferRequest &infer_request, InferenceEngine::CNNNetwork &cnnNetwork,
- const std::string &model, const std::string &target_device, const int &n) {
- log_info("Inference of InferRequest from network: \"" << model
- << "\" for device: \"" << target_device << "\" for " << n
- << " times");
+TestResult test_reinfer_request_inference(InferenceEngine::InferRequest& infer_request,
+ InferenceEngine::CNNNetwork& cnnNetwork, const std::string& model,
+ const std::string& target_device, const int& n) {
+ log_info("Inference of InferRequest from network: \"" << model << "\" for device: \"" << target_device << "\" for "
+ << n << " times");
return common_test_pipeline(reinfer_request_inference(infer_request, cnnNetwork), n);
}
import sys
from inspect import getsourcefile
from pathlib import Path
+from xml.etree import ElementTree as ET
log.basicConfig(format="{file}: [ %(levelname)s ] %(message)s".format(file=os.path.basename(__file__)),
level=log.INFO, stream=sys.stdout)
# Parameters
OMZ_NUM_ATTEMPTS = 6
-MODEL_NAMES = 'vgg16,mtcnn-r,mobilenet-ssd,ssd300'
+MODEL_NAMES = ['vgg16', 'mtcnn-r', 'mobilenet-ssd', 'ssd300'] # TODO (vurusovs): remove after merge changes in product-configs
def abs_path(relative_path):
description='Acquire test data',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument('--test_conf', required=False,
+ # TODO (vurusovs): make it required after merge changes in product-configs
+ type=Path,
+ help='Path to a test config .xml file containing models '
+ 'which will be downloaded and converted to IRs via OMZ.')
parser.add_argument('--omz_repo', required=False,
help='Path to Open Model Zoo (OMZ) repository. It will be used to skip cloning step.')
- parser.add_argument('--mo_tool', default='../../model-optimizer/mo.py',
+ parser.add_argument('--mo_tool', type=Path,
+ default=Path('../../../model-optimizer/mo.py').resolve(),
help='Path to Model Optimizer (MO) runner. Required for OMZ converter.py only.')
- parser.add_argument('--omz_models_out_dir', default='../_omz_out/models',
+ parser.add_argument('--omz_models_out_dir', type=Path,
+ default=abs_path('../_omz_out/models'),
help='Directory to put test data into. Required for OMZ downloader.py and converter.py')
- parser.add_argument('--omz_irs_out_dir', default='../_omz_out/irs',
+ parser.add_argument('--omz_irs_out_dir', type=Path,
+ default=abs_path('../_omz_out/irs'),
help='Directory to put test data into. Required for OMZ converter.py only.')
- parser.add_argument('--omz_cache_dir', default='../_omz_out/cache',
+ parser.add_argument('--omz_cache_dir', type=Path,
+ default=abs_path('../_omz_out/cache'),
help='Directory with test data cache. Required for OMZ downloader.py only.')
parser.add_argument('--no_venv', action="store_true",
help='Skip preparation and use of virtual environment to convert models via OMZ converter.py.')
args = parser.parse_args()
- models_out_dir = Path(abs_path(args.omz_models_out_dir))
- irs_out_dir = Path(abs_path(args.omz_irs_out_dir))
- cache_dir = Path(abs_path(args.omz_cache_dir))
- mo_tool = Path(args.mo_tool).resolve()
+
+ # Step 0: prepare models list
+ if not args.test_conf: # TODO (vurusovs): remove after merge changes in product-configs
+ models_names = MODEL_NAMES
+ else:
+ tree = ET.parse(str(args.test_conf))
+ root = tree.getroot()
+ models_names = []
+ for attributes in root:
+ if attributes.tag == "models":
+ models = [child.text for child in attributes]
+ models_names = [Path(model).stem for model in models]
+ break
+
+ models_list_path = Path().resolve() / "models_list.txt"
+ log.info("List of models from {models_list_path} used for downloader.py and converter.py: "
+ "{models_names}".format(models_list_path=models_list_path, models_names=",".join(models_names)))
+ with open(str(models_list_path), "w") as file:
+ file.writelines([name + "\n" for name in models_names])
# Step 1: prepare Open Model Zoo
if args.omz_repo:
# Step 3: prepare models
downloader_path = omz_path / "tools" / "downloader" / "downloader.py"
- cmd = '{downloader_path} --name "{MODEL_NAMES}"' \
+ cmd = '{downloader_path} --list {models_list_path}' \
' --num_attempts {num_attempts}' \
' --output_dir {models_dir}' \
- ' --cache_dir {cache_dir}'.format(downloader_path=downloader_path, MODEL_NAMES=MODEL_NAMES,
+ ' --cache_dir {cache_dir}'.format(downloader_path=downloader_path, models_list_path=models_list_path,
num_attempts=OMZ_NUM_ATTEMPTS,
- models_dir=models_out_dir,
- cache_dir=cache_dir)
+ models_dir=args.omz_models_out_dir,
+ cache_dir=args.omz_cache_dir)
run_in_subprocess(cmd)
# Step 4: prepare virtual environment and install requirements
Venv = VirtualEnv("./.stress_venv")
requirements = [
omz_path / "tools" / "downloader" / "requirements.in",
- mo_tool.parent / "requirements.txt",
- mo_tool.parent / "requirements_dev.txt",
+ args.mo_tool.parent / "requirements.txt",
+ args.mo_tool.parent / "requirements_dev.txt",
# omz_path / "tools" / "downloader" / "requirements-caffe2.in",
# omz_path / "tools" / "downloader" / "requirements-pytorch.in"
]
# Step 5: convert models to IRs
converter_path = omz_path / "tools" / "downloader" / "converter.py"
# NOTE: remove --precision if both precisions (FP32 & FP16) required
- cmd = '{executable} {converter_path} --name "{MODEL_NAMES}"' \
+ cmd = '{executable} {converter_path} --list "{models_list_path}"' \
' -p {executable}' \
' --precision=FP32' \
' --output_dir {irs_dir}' \
' --download_dir {models_dir}' \
' --mo {mo_tool} --jobs {workers_num}'.format(executable=python_executable, converter_path=converter_path,
- MODEL_NAMES=MODEL_NAMES, irs_dir=irs_out_dir,
- models_dir=models_out_dir, mo_tool=mo_tool,
+ models_list_path=models_list_path, irs_dir=args.omz_irs_out_dir,
+ models_dir=args.omz_models_out_dir, mo_tool=args.mo_tool,
workers_num=multiprocessing.cpu_count())
run_in_subprocess(cmd)
--- /dev/null
+#!/usr/bin/env python3
+# Copyright (C) 2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+"""
+Upload metrics gathered by MemCheckTests into Mongo DB
+Usage: ./scrips/memcheck_upload.py https://ci.intel.com/job/memchek/1234/ \
+ ./gtest-parallel-logs/**/*.log \
+ --artifact_root ./gtest-parallel-logs --dryrun
+"""
+
+import json
+import logging
+from types import SimpleNamespace
+import os
+import re
+import sys
+import argparse
+from glob import glob
+import xml.etree.ElementTree as ET
+import hashlib
+from pymongo import MongoClient
+
+
+DATABASE = 'memcheck'
+RE_GTEST_MODEL_XML = re.compile(r'<model[^>]*>')
+RE_GTEST_CUR_MEASURE = re.compile(
+ r'Current values of virtual memory consumption')
+RE_GTEST_REF_MEASURE = re.compile(
+ r'Reference values of virtual memory consumption')
+RE_GTEST_PASSED = re.compile(r'\[\s*PASSED\s*\]')
+RE_GTEST_FAILED = re.compile(r'\[\s*FAILED\s*\]')
+GTEST_INFO = '[ INFO ]'
+PRECISSIONS = ('FP32', 'FP16', 'INT8')
+KEY_FIELDS = ('test_name', 'model', 'device', 'build_url')
+
+
+def globber(paths):
+ """Generator extending paths with wildcards"""
+ for path in paths:
+ if any(magic in path for magic in ['*', '?', '!', '[', ']']):
+ for resolved in glob(path, recursive=True):
+ yield resolved
+ else:
+ yield path
+
+
+def parse_memcheck_log(log_path):
+ """ Parse memcheck log
+ """
+ with open(log_path, 'r') as log_file:
+ log = log_file.read()
+
+ passed_match = RE_GTEST_PASSED.search(log)
+ failed_match = RE_GTEST_FAILED.search(log)
+ model_match = RE_GTEST_MODEL_XML.search(log)
+ if not model_match:
+ return None
+ model = ET.fromstring(model_match.group(0)).attrib
+
+ log_lines = log.splitlines()
+ for index, line in enumerate(log_lines):
+ if RE_GTEST_REF_MEASURE.search(line):
+ heading = [name.lower() for name in log_lines[index+1]
+ [len(GTEST_INFO):].split()]
+ values = [int(val) for val in log_lines[index+2]
+ [len(GTEST_INFO):].split()]
+ ref_metrics = dict(zip(heading, values))
+ for index in reversed(range(len(log_lines))):
+ if RE_GTEST_CUR_MEASURE.search(log_lines[index]):
+ heading = [name.lower() for name in log_lines[index+1]
+ [len(GTEST_INFO):].split()]
+ values = [int(val) for val in log_lines[index+2]
+ [len(GTEST_INFO):].split()]
+ entry = SimpleNamespace(
+ metrics=dict(zip(heading, values)),
+ test_name=model['test'],
+ model_name=os.path.splitext(
+ os.path.basename(model['path']))[0],
+ precision=next(pr for pr in PRECISSIONS if pr.upper()
+ in model['path'].upper()),
+ model=model['path'],
+ device=model['device'].upper(),
+ status='passed' if passed_match else 'failed' if failed_match else 'started'
+ )
+ if ref_metrics:
+ entry.ref_metrics = ref_metrics
+ return vars(entry)
+ return None
+
+
+def create_memcheck_records(logs, build_url, artifact_root, append=None):
+ """ Parse memcheck logs and create records for MongoDB
+ """
+ records = []
+ for log in logs:
+ data = parse_memcheck_log(log)
+ if not data:
+ continue
+ data['build_url'] = build_url
+ data['log_path'] = os.path.relpath(log, artifact_root)
+ if append:
+ data.update(append)
+
+ data['_id'] = hashlib.sha256(
+ ''.join([str(data[key]) for key in KEY_FIELDS]).encode()).hexdigest()
+ records += [data]
+ return records
+
+
+def upload_memcheck_records(records, db_url, db_collection):
+ """ Upload records created by create_memcheck_records
+ """
+ client = MongoClient(db_url)
+ collection = client[DATABASE][db_collection]
+ for record in records:
+ collection.replace_one({'_id': record['_id']}, record, upsert=True)
+
+
+def main():
+ """Main entry point.
+ """
+ parser = argparse.ArgumentParser(
+ description='Upload metrics gathered by memcheck into Mongo DB',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument('--dryrun', action="store_true",
+ help='Parse logs, not modify database.')
+ is_dryrun = parser.parse_known_args()[0].dryrun
+ parser.add_argument('build_url', help='A place where memcheck execution logs can be found.')
+ parser.add_argument('log', nargs='+', help='Local path to log. Extended wildcards supported.')
+ parser.add_argument('--db_url', required=not is_dryrun,
+ help='MongoDB URL in a for "mongodb://server:port".')
+ parser.add_argument('--db_collection', required=not is_dryrun,
+ help=f'Collection name in {DATABASE} database to upload')
+ parser.add_argument('--artifact_root', required=True,
+ help=f'A root directory to strip from log path before upload.')
+ parser.add_argument('--append', help='JSON to append to each item.')
+ args = parser.parse_args()
+
+ logging.basicConfig(format="{file}: [ %(levelname)s ] %(message)s".format(
+ file=os.path.basename(__file__)), level=logging.INFO, stream=sys.stdout)
+
+ if args.append:
+ with open(args.append, 'r') as append_file:
+ append = json.load(append_file)
+ else:
+ append = None
+
+ logs = list(globber(args.log))
+ records = create_memcheck_records(
+ logs, args.build_url, args.artifact_root, append=append)
+ logging.info('Prepared %d records', len(records))
+ if len(records) != len(logs):
+ logging.warning(
+ 'Skipped %d logs of %d', len(logs) - len(records), len(logs))
+ if not args.dryrun:
+ upload_memcheck_records(records, args.db_url, args.db_collection)
+ logging.info('Uploaded to %s', args.db_url)
+ else:
+ print(json.dumps(records, sort_keys=True, indent=4))
+
+
+if __name__ == "__main__":
+ main()
--- /dev/null
+pymongo
\ No newline at end of file
-t TIME, --time TIME Optional. Time in seconds to execute topology.
-progress [PROGRESS] Optional. Show progress bar (can affect performance
measurement). Default values is "False".
+ -shape SHAPE Optional. Set shape for input. For example,
+ "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in
+ case of one input size.
-nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS
Optional. Number of streams to use for inference on the CPU/GPU in throughput mode
(for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
from .utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, GPU_DEVICE_NAME, BIN_EXTENSION
from .utils.logging import logger
from .utils.utils import get_duration_seconds
-from .utils.inputs_filling import get_blob_shape
from .utils.statistics_report import StatisticsReport
class Benchmark:
version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number)
return version_string
- @staticmethod
- def reshape(ie_network: IENetwork, batch_size: int):
- new_shapes = {}
- for input_layer_name, input_layer in ie_network.inputs.items():
- new_shapes[input_layer_name] = get_blob_shape(input_layer, batch_size)
-
- if new_shapes:
- logger.info('Resizing network to batch = {}'.format(batch_size))
- ie_network.reshape(new_shapes)
-
def set_config(self, config = {}):
for device in config.keys():
self.ie.set_config(config[device], device)
return exe_network
+ def import_network(self, path_to_file : str, config = {}):
+ exe_network = self.ie.import_network(model_file=path_to_file,
+ device_name=self.device,
+ config=config,
+ num_requests=1 if self.api_type == 'sync' else self.nireq or 0)
+ # Number of requests
+ self.nireq = len(exe_network.requests)
+ return exe_network
+
def infer(self, exe_network, batch_size, progress_bar=None):
progress_count = 0
infer_requests = exe_network.requests
from openvino.tools.benchmark.benchmark import Benchmark
from openvino.tools.benchmark.parameters import parse_args
-from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME, BIN_EXTENSION
+from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, \
+ GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME, BIN_EXTENSION, BLOB_EXTENSION
from openvino.tools.benchmark.utils.inputs_filling import set_inputs
from openvino.tools.benchmark.utils.logging import logger
from openvino.tools.benchmark.utils.progress_bar import ProgressBar
from openvino.tools.benchmark.utils.utils import next_step, config_network_inputs, get_number_iterations, \
process_help_inference_string, print_perf_counters, dump_exec_graph, get_duration_in_milliseconds, \
- get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, load_config, dump_config
+ get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, update_shapes, \
+ adjust_shapes_batch, load_config, dump_config
from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport
+
def main():
# ------------------------------ 1. Parsing and validating input arguments -------------------------------------
next_step()
run(parse_args())
+
def run(args):
statistics = None
try:
if args.load_config:
load_config(args.load_config, config)
+ is_network_compiled = False
+ _, ext = os.path.splitext(args.path_to_model)
+
+ if ext == BLOB_EXTENSION:
+ is_network_compiled = True
+ print("Network is compiled")
+
# ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
next_step(step_id=2)
logger.info(version)
- # --------------------- 3. Read the Intermediate Representation of the network ---------------------------------
- next_step()
-
- start_time = datetime.utcnow()
- ie_network = benchmark.read_network(args.path_to_model)
- duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
- logger.info("Read network took {} ms".format(duration_ms))
- if statistics:
- statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
- [
- ('read network time (ms)', duration_ms)
- ])
-
- # --------------------- 4. Resizing network to match image sizes and given batch -------------------------------
-
- next_step()
- if args.batch_size and args.batch_size != ie_network.batch_size:
- benchmark.reshape(ie_network, args.batch_size)
- batch_size = ie_network.batch_size
- logger.info('Network batch size: {}'.format(ie_network.batch_size))
-
- # --------------------- 5. Configuring input of the model ------------------------------------------------------
- next_step()
-
- config_network_inputs(ie_network)
-
- # --------------------- 6. Setting device configuration --------------------------------------------------------
+ # --------------------- 3. Setting device configuration --------------------------------------------------------
next_step()
perf_counts = False
perf_counts = perf_counts
benchmark.set_config(config)
+ batch_size = args.batch_size
+ if not is_network_compiled:
+ # --------------------- 4. Read the Intermediate Representation of the network -----------------------------
+ next_step()
+
+ start_time = datetime.utcnow()
+ ie_network = benchmark.read_network(args.path_to_model)
+ duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
+ logger.info("Read network took {} ms".format(duration_ms))
+ if statistics:
+ statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+ [
+ ('read network time (ms)', duration_ms)
+ ])
- # --------------------- 7. Loading the model to the device -----------------------------------------------------
+ # --------------------- 5. Resizing network to match image sizes and given batch ---------------------------
+ next_step()
+
+ shapes = {k: v.shape.copy() for k, v in ie_network.inputs.items()}
+ reshape = False
+ if args.shape:
+ reshape |= update_shapes(shapes, args.shape, ie_network.inputs)
+ if args.batch_size and args.batch_size != ie_network.batch_size:
+ reshape |= adjust_shapes_batch(shapes, args.batch_size, ie_network.inputs)
+
+ if reshape:
+ start_time = datetime.utcnow()
+ logger.info(
+ 'Reshaping network: {}'.format(', '.join("'{}': {}".format(k, v) for k, v in shapes.items())))
+ ie_network.reshape(shapes)
+ duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
+ logger.info("Reshape network took {} ms".format(duration_ms))
+ if statistics:
+ statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+ [
+ ('reshape network time (ms)', duration_ms)
+ ])
+
+ batch_size = ie_network.batch_size
+ logger.info('Network batch size: {}'.format(ie_network.batch_size))
+
+ # --------------------- 6. Configuring input of the model --------------------------------------------------
+ next_step()
+
+ config_network_inputs(ie_network)
+
+ # --------------------- 7. Loading the model to the device -------------------------------------------------
+ next_step()
+
+ start_time = datetime.utcnow()
+ exe_network = benchmark.load_network(ie_network)
+ duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
+ logger.info("Load network took {} ms".format(duration_ms))
+ if statistics:
+ statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+ [
+ ('load network time (ms)', duration_ms)
+ ])
+ else:
+ next_step()
+ print("Skipping the step for compiled network")
+ next_step()
+ print("Skipping the step for compiled network")
+ next_step()
+ print("Skipping the step for compiled network")
+
+ # --------------------- 7. Loading the model to the device -------------------------------------------------
+ next_step()
+
+ start_time = datetime.utcnow()
+ exe_network = benchmark.import_network(args.path_to_model)
+ duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
+ logger.info("Import network took {} ms".format(duration_ms))
+ if statistics:
+ statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+ [
+ ('import network time (ms)', duration_ms)
+ ])
+ if batch_size == 0:
+ batch_size = 1
+
+ # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
next_step()
- start_time = datetime.utcnow()
- exe_network = benchmark.load_network(ie_network)
- duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
- logger.info("Load network took {} ms".format(duration_ms))
- if statistics:
- statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
- [
- ('load network time (ms)', duration_ms)
- ])
- ## Update number of streams
+ # Update number of streams
for device in device_number_streams.keys():
key = device + '_THROUGHPUT_STREAMS'
device_number_streams[device] = benchmark.ie.get_config(device, key)
- # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
- next_step()
-
# Number of requests
infer_requests = exe_network.requests
if args.paths_to_input:
for path in args.paths_to_input:
paths_to_input.append(os.path.abspath(*path) if args.paths_to_input else None)
- set_inputs(paths_to_input, batch_size, ie_network.inputs, infer_requests)
+ set_inputs(paths_to_input, batch_size, exe_network.inputs, infer_requests)
if statistics:
statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
import sys,argparse
from fnmatch import fnmatch
-from openvino.tools.benchmark.utils.constants import XML_EXTENSION_PATTERN
+from openvino.tools.benchmark.utils.constants import XML_EXTENSION_PATTERN, BLOB_EXTENSION_PATTERN
from openvino.tools.benchmark.utils.utils import show_available_devices
def str2bool(v):
raise Exception("Number of iterations should be positive (invalid -niter option value)")
if args.number_infer_requests and args.number_infer_requests < 0:
raise Exception("Number of inference requests should be positive (invalid -nireq option value)")
- if not fnmatch(args.path_to_model, XML_EXTENSION_PATTERN):
- raise Exception('Path {} is not xml file.')
+ if not (fnmatch(args.path_to_model, XML_EXTENSION_PATTERN) or fnmatch(args.path_to_model, BLOB_EXTENSION_PATTERN)):
+ raise Exception('Path {} is not xml or blob file.')
class print_help(argparse.Action):
help='Optional. '
'Path to a folder with images and/or binaries or to specific image or binary file.')
args.add_argument('-m', '--path_to_model', type=str, required=True,
- help='Required. Path to an .xml file with a trained model.')
+ help='Required. Path to an .xml file with a trained model or '
+ 'to a .blob file with a trained compiled model.')
args.add_argument('-d', '--target_device', type=str, required=False, default='CPU',
help='Optional. Specify a target device to infer on (the list of available devices is shown below). '
'Default value is CPU. Use \'-d HETERO:<comma separated devices list>\' format to specify HETERO plugin. '
'If not specified, the number of iterations is calculated depending on a device.')
args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=None,
help='Optional. Number of infer requests. Default value is determined automatically for device.')
- args.add_argument('-b', '--batch_size', type=int, required=False, default=None,
+ args.add_argument('-b', '--batch_size', type=int, required=False, default=0,
help='Optional. ' +
'Batch size value. ' +
'If not specified, the batch size value is determined from Intermediate Representation')
args.add_argument('-progress', type=str2bool, required=False, default=False, nargs='?', const=True,
help='Optional. '
'Show progress bar (can affect performance measurement). Default values is \'False\'.')
+ args.add_argument('-shape', type=str, required=False, default='',
+ help='Optional. '
+ 'Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.')
args.add_argument('-nstreams', '--number_streams', type=str, required=False, default=None,
help='Optional. Number of streams to use for inference on the CPU/GPU in throughput mode '
'(for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> '
XML_EXTENSION = '.xml'
BIN_EXTENSION = '.bin'
+BLOB_EXTENSION = '.blob'
XML_EXTENSION_PATTERN = '*' + XML_EXTENSION
+BLOB_EXTENSION_PATTERN = '*' + BLOB_EXTENSION
IMAGE_EXTENSIONS = ['JPEG', 'JPG', 'PNG', 'BMP']
BINARY_EXTENSIONS = ['BIN']
from .constants import IMAGE_EXTENSIONS, BINARY_EXTENSIONS
from .logging import logger
-
-def get_blob_shape(layer, batch_size: int):
- shape = layer.shape.copy()
- layout = layer.layout
-
- try:
- batch_index = layout.index('N')
- except ValueError:
- batch_index = 1 if layout == 'C' else -1
-
- if batch_index != -1 and shape[batch_index] != batch_size:
- shape[batch_index] = batch_size
-
- return shape
-
-
def is_image(blob):
if blob.layout != "NCHW":
return False
from .logging import logger
import json
+import re
def static_vars(**kwargs):
def decorate(func):
step_names = {
1: "Parsing and validating input arguments",
2: "Loading Inference Engine",
- 3: "Reading the Intermediate Representation network",
- 4: "Resizing network to match image sizes and given batch",
- 5: "Configuring input of the model",
- 6: "Setting device configuration",
+ 3: "Setting device configuration",
+ 4: "Reading the Intermediate Representation network",
+ 5: "Resizing network to match image sizes and given batch",
+ 6: "Configuring input of the model",
7: "Loading the model to the device",
8: "Setting optimal runtime parameters",
9: "Creating infer requests and filling input blobs with images",
parameters.append((arg_name, arg_value))
return parameters
+def update_shapes(shapes, shapes_string: str, inputs_info):
+ updated = False
+ matches = re.findall(r'(.*?)\[(.*?)\],?', shapes_string)
+ if matches:
+ for match in matches:
+ input_name = match[0]
+ parsed_shape = [int(dim) for dim in match[1].split(',')]
+ if input_name != '':
+ shapes[input_name] = parsed_shape
+ updated = True
+ else:
+ shapes.update({ k:parsed_shape for k in shapes.keys() })
+ updated = True
+ break
+ else:
+ raise Exception("Can't parse `shape` parameter: {}".format(shapes_string))
+ return updated
+
+def adjust_shapes_batch(shapes, batch_size: int, inputs_info):
+ updated = False
+ for name, data in inputs_info.items():
+ layout = data.layout
+ batch_index = layout.index('N') if 'N' in layout else -1
+ if batch_index != -1 and shapes[name][batch_index] != batch_size:
+ shapes[name][batch_index] = batch_size
+ updated = True
+ return updated
+
def show_available_devices():
ie = IECore()
print("\nAvailable target devices: ", (" ".join(ie.available_devices)))